In [49]:

# matplotlib
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt

# Pandas and Numpy
import numpy as np
import pandas as pd

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
import psycopg2

In [50]:
#import connection string (youll need to create this)
from config import conn

# create the engine
engine = create_engine(conn)
Base = automap_base()

# reflect an existing database into a new model
Base.prepare(engine, reflect=True)

# reflect the tables
#Recession_Indicator = Base.classes.recession_indicator_by_day
Recession_Indicator = Base.classes.us_recession
Historical_Price = Base.classes.historical_price_by_day

In [32]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [35]:
#create df from Recession_Indicator table
results = []

results = session.query(Recession_Indicator.date, \
                        Recession_Indicator.recession_indicator, \
                        Recession_Indicator.country)


recession_df = pd.DataFrame(results).set_index(['country'])
recession_df.head()

Unnamed: 0_level_0,date,recession_indicator
country,Unnamed: 1_level_1,Unnamed: 2_level_1
us,25569,True
us,25570,True
us,25571,True
us,25572,True
us,25573,True


In [34]:
#create df from Historical_Price table

results = []

results = session.query(Historical_Price.date, \
                        Historical_Price.open, \
                        Historical_Price.high, \
                        Historical_Price.low, \
                        Historical_Price.close, \
                        Historical_Price.ticker)


price_df = pd.DataFrame(results).set_index(['date','ticker'])
price_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
36643,SPY,$143.00,$147.34,$143.00,$146.00
36644,SPY,$147.00,$147.86,$145.06,$145.09
36647,SPY,$146.56,$148.48,$145.84,$147.06
36648,SPY,$145.50,$147.13,$144.13,$144.13
36649,SPY,$144.00,$144.00,$139.78,$140.75


In [37]:
# Create our features
X = recession_df.drop(columns='recession_indicator')

# Create our target
y = recession_df["recession_indicator"]

In [39]:
y.describe()

count     18262
unique        2
top       False
freq       9712
Name: recession_indicator, dtype: object

In [41]:
from collections import Counter
# Split the X and y into X_train, X_test, y_train, y_test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1)
Counter(y_train)




Counter({False: 7256, True: 6440})

In [42]:
# Resample the training data with the RandomOversampler
from imblearn.ensemble import BalancedRandomForestClassifier

model_1 = BalancedRandomForestClassifier(random_state=1)
model_1.fit(X_train, y_train)

Using TensorFlow backend.


BalancedRandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                               criterion='gini', max_depth=None,
                               max_features='auto', max_leaf_nodes=None,
                               max_samples=None, min_impurity_decrease=0.0,
                               min_samples_leaf=2, min_samples_split=2,
                               min_weight_fraction_leaf=0.0, n_estimators=100,
                               n_jobs=None, oob_score=False, random_state=1,
                               replacement=False, sampling_strategy='auto',
                               verbose=0, warm_start=False)

In [44]:

from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
# Calculated the balanced accuracy score
predict_1 = model_1.predict(X_test)
bas_1 = balanced_accuracy_score(y_test, predict_1)
print(bas_1)

0.9986449665776433


In [46]:
cm_1 = confusion_matrix(y_test, predict_1)

cm_df_1 = pd.DataFrame(cm_1, index=["Actual Recession", "Actual NoRecession"], columns=["Predicted Recession", "Predicted No Recession"])
print(cm_df_1)

                    Predicted Recession  Predicted No Recession
Actual Recession                   2454                       2
Actual NoRecession                    4                    2106


In [47]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced

print(classification_report_imbalanced(y_test, predict_1))

                   pre       rec       spe        f1       geo       iba       sup

      False       1.00      1.00      1.00      1.00      1.00      1.00      2456
       True       1.00      1.00      1.00      1.00      1.00      1.00      2110

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      4566



In [48]:
# Displaying results

print("Balanced Random Forest Classifier Confusion Matrix")
display(cm_df_1)
print(f"\nBalanced Accuracy Score : {bas_1}")
print("\nImbalanced Classification Report")
print(classification_report_imbalanced(y_test, predict_1))

Balanced Random Forest Classifier Confusion Matrix


Unnamed: 0,Predicted Recession,Predicted No Recession
Actual Recession,2454,2
Actual NoRecession,4,2106



Balanced Accuracy Score : 0.9986449665776433

Imbalanced Classification Report
                   pre       rec       spe        f1       geo       iba       sup

      False       1.00      1.00      1.00      1.00      1.00      1.00      2456
       True       1.00      1.00      1.00      1.00      1.00      1.00      2110

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      4566

