In [220]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression, RidgeCV
from sklearn.neural_network import MLPRegressor 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.compose import TransformedTargetRegressor, ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.metrics import roc_curve
from tqdm.notebook import tqdm

In [82]:
data = pd.read_csv('dec_tree_df_basic_bayes.csv', index_col='Date', parse_dates=True)
data.drop(
        [
            "Unnamed: 0"
        ],
        axis=1, inplace=True
    )
data = data.iloc[245:]
data

Unnamed: 0_level_0,Int_Rate,CPI,UNRATE,VXLookback1,VXLookback2,VXLookback3,VXLookback4,VXLookforward1,VXLookback0,VXUpDown,...,SPLookback2,SPLookback3,SPLookback4,SPLookforward1,SPLookback0,SPUpDown,SP1MonRet,SP3MonRet,SPMomentum_Wes_G,SPMomentumAbs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1994-02-10,3.56000,146.700,6.6,-2.635432,-2.148997,-8.459016,41.860465,1.544944,7.067669,1,...,0.066205,0.465744,-2.275718,0.332441,-0.856924,1,-0.681979,2.104127,8.845978,8.164000
1994-02-11,3.56000,146.700,6.6,7.067669,-2.635432,-2.148997,-8.459016,-1.244813,1.544944,-1,...,0.397090,0.066205,0.465744,0.132512,0.332441,1,-0.085449,1.964876,11.104693,11.019244
1994-02-14,3.56000,146.700,6.6,1.544944,7.067669,-2.635432,-2.148997,-6.162465,-1.244813,-1,...,-0.856924,0.397090,0.066205,0.529500,0.132512,1,-0.350027,2.164418,11.573699,11.223672
1994-02-15,3.56000,146.700,6.6,-1.244813,1.544944,7.067669,-2.635432,-2.014925,-6.162465,-1,...,0.332441,-0.856924,0.397090,-0.065894,0.529500,-1,0.179473,2.224126,11.645606,11.825079
1994-02-16,3.56000,146.700,6.6,-6.162465,-1.244813,1.544944,7.067669,5.026657,-2.014925,1,...,0.132512,0.332441,-0.856924,-0.592848,-0.065894,-1,-0.018308,2.692638,11.417510,11.399202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-31,0.71920,288.663,3.6,-6.472727,-3.066620,-3.667233,3.405899,-1.909126,1.827372,-1,...,1.998138,0.883490,-0.763378,-0.808853,-0.561098,-1,0.615965,-2.859037,0.714700,1.330665
2022-06-01,0.73621,288.663,3.6,1.827372,-6.472727,-3.066620,-3.667233,-3.775788,-1.909126,-1,...,2.454914,1.998138,0.883490,1.904348,-0.808853,1,-0.794832,-5.507509,1.159393,0.364561
2022-06-02,0.75289,288.663,3.6,-1.909126,1.827372,-6.472727,-3.066620,0.283172,-3.775788,1,...,-0.561098,2.454914,1.998138,-1.641152,1.904348,-1,0.651111,-3.105312,1.988933,2.640044
2022-06-03,0.76923,288.663,3.6,-3.775788,-1.909126,1.827372,-6.472727,1.129488,0.283172,1,...,-0.808853,-0.561098,2.454914,0.304477,-1.641152,1,-4.035334,-3.934001,4.119631,0.084297


In [118]:
X = data[['Int_Rate', 'CPI', 'UNRATE','VXLookback1', 'VXLookback2',
       'VXLookback3', 'VXLookback4', 'VXLookback0', 
          'VX1MonChg', 'VX3MonChg', 'Int_Rate1MonChg', 'Int_Rate3MonChg',
         'SPClose', 'SPLookback1', 'SPLookback2', 'SPLookback3', 'SPLookback4',
         'SPLookback0', 'SP1MonRet', 'SP3MonRet', 'SPMomentum_Wes_G']]
y = data['SPUpDown']

In [120]:
transformed_columns = ['Int_Rate', 'CPI', 'UNRATE','VXLookback1', 'VXLookback2',
       'VXLookback3', 'VXLookback4', 'VXLookback0', 
          'VX1MonChg', 'VX3MonChg', 'Int_Rate1MonChg', 'Int_Rate3MonChg',
         'SPClose', 'SPLookback1', 'SPLookback2', 'SPLookback3', 'SPLookback4',
         'SPLookback0', 'SP1MonRet', 'SP3MonRet', 'SPMomentum_Wes_G']

In [145]:
#Keep to try regression
pipe = Pipeline(
    steps = [
        ('ct', ColumnTransformer(
        transformers = [
            ('power', PowerTransformer(), transformed_columns)
        ],
            remainder = 'passthrough'
        )),
        ('scaler', StandardScaler()),
        ('linear', RidgeCV())
    ]
)

ttr = TransformedTargetRegressor(
    regressor = pipe,
    #func = np.log,
    #inverse_func = np.exp
)

ttr.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

print(f'Accuracy Score: {(accuracy_score(y_test, y_pred))}')
print(f'MSE: {mean_squared_error(y_test, ttr.predict(X_test))}')

Accuracy Score: 0.4973846885401807
MSE: 0.9977350424057013


In [222]:
#For Classification 
rf =  RandomForestClassifier(max_features=5)

mx_depth =list(np.arange(5, 20, 3))
n_estimators = list(np.arange(10, 200, 5))
params = {
            'max_depth': mx_depth,
            "n_estimators": n_estimators
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 321)

#get the grid search going
cv = tqdm(GridSearchCV(estimator=rf, param_grid=params, scoring= 'accuracy', cv=len(n_estimators), n_jobs=1))

#fit it
tqdm(cv.fit(X_train, y_train))

#get it 
best = cv.best_estimator_ 

#define y_pred
y_pred = best.predict(X_test)

print("Accuracy: {}".format(cv.score(X_test, y_test)))
print("Tuned Model Parameters: {}".format(cv.best_params_))

ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

In [185]:
print("Accuracy: {}".format(cv.score(X_test, y_test)))
#Confusion Matrix
print(f'Confusion Matrix: {(confusion_matrix(y_pred , y_test))}')

<bound method BaseEstimator.get_params of RandomForestClassifier(max_depth=4, n_estimators=75)>

In [136]:
# Use it to predict the labels of the test data
rf_predictions = rf.predict(X_test)

#print(f'R2 Score:{accuracy_score(y_true, rf.predict(y_test))}')
print(f'MAE: {mean_squared_error(y_test, rf_predictions)}')

#Confusion Matrix
print(f'Confusion Matrix: {(confusion_matrix(rf_predictions , y_test))}')

#Hit Rate
print(svm.score(X_test, y_test))

# Assess the accuracy of both classifiers
print(f'Accuracy Score: {(accuracy_score(y_test, rf_predictions))}')


Accuracy Score: 0.5192582025677603
MAE: 1.9229671897289586
Confusion Matrix: [[147 191]
 [820 945]]


In [131]:
importances = pd.DataFrame({
    'variable': X_train.columns,
    'importance': rf.feature_importances_
})
importances.sort_values('importance', ascending = False)

Unnamed: 0,variable,importance
17,SPLookback0,0.057399
14,SPLookback2,0.055033
7,VXLookback0,0.053004
16,SPLookback4,0.052454
3,VXLookback1,0.052381
6,VXLookback4,0.05235
18,SP1MonRet,0.051815
5,VXLookback3,0.051801
8,VX1MonChg,0.051574
13,SPLookback1,0.049994
