In [0]:
import pandas as pd
import numpy as np
from scipy.io import loadmat
import time

#import functions as fn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

from sklearn.metrics import make_scorer, r2_score, mean_squared_error, auc, mean_absolute_error, accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import RandomizedSearchCV, KFold, GridSearchCV
from sklearn.svm import SVC, SVR
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
        
from numpy.linalg import norm

In [0]:
def calculateReturns(prices, lag):
    
    #prices is a pandasDataFrame type
    prevPrices = prices.shift(lag)
    return ((prices - prevPrices) / prevPrices)

def thresh_func(x, lb, ub):
    
    if x <= lb:
        return 0
    elif lb < x < ub:
        return 1
    
    elif x > ub:
        return 2
    
    else:
        return np.nan

In [0]:
def ETF_predictions(ticker, MLmodel='rf', MLtype='regression'):
    
    ########################## DATA IMPORT ##############################################
    
    file=ticker + ".csv"
    
    df=pd.read_csv(file)
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by=["PERMNO","date"], inplace=True)
    
    #Remove row where return=C and convert the rest of the column to float type
    #df['RET'] = np.where(df.RET == "C", 0, df.RET)
    df = df[df.RET != "C"]
    df['RET'] = df['RET'].astype(float)
    
    ########################## FEATURE ENGINEERING #######################################
    
    df["price"]=(df.ASK+df.BID)/2
    df["bidAskSpread"]=df.ASK-df.BID
    df["marketValue"] = df.SHROUT*df.price
    df["dollarVolume"] = df.VOL*df.price
    
    df["RET_1Mo"] = np.where(df.PERMNO == df.PERMNO.shift(21), (df.price-df.price.shift(21))/df.price.shift(21), np.nan)
    df["RET_3Mo"]  = np.where(df.PERMNO == df.PERMNO.shift(63), (df.price-df.price.shift(63))/df.price.shift(63), np.nan)
    df["RET_12Mo"]  = np.where(df.PERMNO == df.PERMNO.shift(253), (df.price-df.price.shift(253))/df.price.shift(253), np.nan)
    

    df["VolRET"] = np.where(df.PERMNO == df.PERMNO.shift(63), df.RET.rolling(window=63).std()*np.sqrt(253), np.nan)
   
    df["StdRET_12Mo"]=np.where(df.PERMNO == df.PERMNO.shift(253), df.RET.rolling(window=253).std(), np.nan)
    df["StdMARKET_12Mo"]=np.where(df.PERMNO == df.PERMNO.shift(253), df.sprtrn.rolling(window=253).std(), np.nan)
    df["CovRETMARKET_12Mo"]=np.where(df.PERMNO == df.PERMNO.shift(253),df.RET.rolling(window=253).cov(df.sprtrn.rolling(window=253)), np.nan)
    
    df["Beta_12Mo"]=df.CovRETMARKET_12Mo/(df.StdMARKET_12Mo**2)
    df["IvolRET_12Mo"]=np.sqrt((df.StdRET_12Mo**2-(df.Beta_12Mo*df.StdMARKET_12Mo)**2))*np.sqrt(253)
    df["BetaSq_12Mo"]=df.Beta_12Mo**2

    df["StdRET_3Mo"]=np.where(df.PERMNO == df.PERMNO.shift(63), df.RET.rolling(window=63).std(), np.nan)
    df["StdMARKET_3Mo"]=np.where(df.PERMNO == df.PERMNO.shift(63), df.sprtrn.rolling(window=63).std(), np.nan)
    df["CovRETMARKET_3Mo"]=np.where(df.PERMNO == df.PERMNO.shift(63),df.RET.rolling(window=63).cov(df.sprtrn.rolling(window=63)), np.nan)
    df["Beta_3Mo"]=df.CovRETMARKET_3Mo/(df.StdMARKET_3Mo**2)
    df["IvolRET_3Mo"]=np.sqrt((df.StdRET_3Mo**2-(df.Beta_3Mo*df.StdMARKET_3Mo)**2))*np.sqrt(253)
    df["BetaSq_3Mo"]=df.Beta_3Mo**2
    
    dfprice = df.price
    dfprice.index = df.date
    dfprice.replace([np.inf, -np.inf, np.NINF, -np.NINF], np.nan, inplace=True)
    
    df.drop(['PERMNO', 'date','PRC','BID','ASK', 'price'], axis=1, inplace=True)
    
    ########################## DATA PREPARATION ##############################################
    
    dates = dfprice.index
    #df_list = df_list
    mid = dfprice

    #holdingDays = 252 // 4; #hold a quarter
    holdingDays = 100
    #Returns
    Ret1 = calculateReturns(mid.copy(), 1).fillna(value=0)
    #20 week return
    RetQ = calculateReturns(mid.copy(), holdingDays) 

    #shifted next quarter's return to today's row to use as response variable. 
    #Can enter only at next day's close. 
    RetFut = RetQ.copy().shift(-(holdingDays+1))
    
    trainSize = 2014
    
    #flattening and repetition of the date vector
    flat_train = RetFut[:trainSize].size
    flat_test = RetFut[trainSize:].size
    flat = RetFut.size
    m0 = RetFut.shape[0] #original number of observations
    #nStocks = RetFut.shape[1] #original number of stocks

    datesTrain = dates.values[:trainSize].flatten()
    datesTest = dates.values[trainSize:].flatten()
    
    if MLtype == 'regression':
        Y0train = RetFut.values[:trainSize].flatten()[:,np.newaxis]
        Y0test = RetFut.values[trainSize:].flatten()[:,np.newaxis]
        
    elif MLtype == 'classification':
        a = RetFut.values
        lb = np.nanpercentile(a, 33)
        ub = np.nanpercentile(a, 66)
        Y = []
        for ret in RetFut:
            ret = thresh_func(ret, lb, ub)
            Y.append(ret)
        Yarray = np.array(Y).reshape(len(Y),1)
        Y0train = Yarray[:trainSize].flatten()[:,np.newaxis]
        Y0test = Yarray[trainSize:].flatten()[:,np.newaxis]
    
    #number of features - there are 23 features in total
    nInd = df.shape[1]
    
    X0train = np.zeros((flat_train, nInd))
    X0test = np.zeros((flat_test, nInd))
    X0 = np.zeros((flat, nInd))
    
    #######################
    #n = 0
    #df = df.values
    #X0train[:, n] = df[:trainSize].flatten()
    #X0test[:, n] = df[trainSize:].flatten()
    
    X0train = df.values[:trainSize]#.flatten()[:,np.newaxis]
    X0test = df.values[trainSize:]#.flatten()[:,np.newaxis]
    ########################
    
    dataCleanTrain = pd.DataFrame(np.hstack([X0train, Y0train]))
    dataCleanTrain.index = datesTrain
    
    dataCleanTest = pd.DataFrame(np.hstack([X0test, Y0test]))
    dataCleanTest.index = datesTest
    
    dataCleanTrain.replace([np.inf, -np.inf, np.NINF, -np.NINF], np.nan, inplace=True)
    dataCleanTest.replace([np.inf, -np.inf, np.NINF, -np.NINF], np.nan, inplace=True)
    
    dataCleanTrain.dropna(how='any', axis=0, inplace=True)
    dataCleanTest.dropna(how='any', axis=0, inplace=True)
    
    ixCleanTrain = dataCleanTrain.index.values
    ixCleanTest = dataCleanTest.index.values
    #print("First training day: {}".format(ixCleanTrain[0]))
    #print("Last training day: {}".format(ixCleanTrain[-1]))
    #print("First testing day: {}".format(ixCleanTest[0]))
    #print("Last testing day: {}".format(ixCleanTest[-1]))

    dataCleanTrain = dataCleanTrain.values
    dataCleanTest = dataCleanTest.values

    Xtrain, ytrain = dataCleanTrain[:, 0:-1], dataCleanTrain[:, -1][:,np.newaxis]
    Xtest, ytest = dataCleanTest[:, 0:-1], dataCleanTest[:, -1][:,np.newaxis]
    
    ########################## RF GRID ###########################################
    
    if MLmodel == 'rf':
        
        if MLtype == 'regression':
            model = RandomForestRegressor()
            
        elif MLtype == 'classification':
            model = RandomForestClassifier()
    
        np.random.seed(2)
        random_state=20

        # Create the pipe to tune

        pipe = Pipeline([("scaler",StandardScaler()),("sfm",SelectFromModel(model)),("rf",model)])

        #prepare parameter_grid

        # Number of trees in random forest
        n_estimators = [int(x) for x in np.linspace(start = 100, stop = 1000, num = 10)]

        # Number of features to consider at every split
        max_features = [round(x,2) for x in np.linspace(start = 0.1, stop = 0.5, num = 5)]

        # Max depth of the tree
        max_depth = [2,3,4]
        
        # Level of threshold
        threshold = [".5*median", "median"]

        # Minimum number of samples required at each leaf node
        #min_samples_leaf = [int(x) for x in np.linspace(start = 15, stop = 100, num = 5)]

        # Method of selecting training subset for training each tree
        #bootstrap = [True]


        # Save these parameters in a dictionary
        param_grid = {'rf__n_estimators': n_estimators, 
                      'rf__max_features': max_features, 
                      'rf__max_depth': max_depth, 
                      'sfm__threshold': threshold
                      } 

        #Random search of parameters by searching across different combinations

        kfold = KFold(n_splits = 5, shuffle = True)
        rso = RandomizedSearchCV(pipe, 
                                       param_distributions = param_grid, 
                                       n_iter = 10,                               
                                       cv = kfold,
                                       random_state=random_state
                                       )
        # Fit the model to find the best hyperparameter values
        rso.fit(Xtrain, ytrain.ravel())

        print(rso.best_params_)
        #{'rf__max_features': 0.4, 'rf__min_samples_leaf': 15, 'rf__max_depth': 4, 'rf__bootstrap': True, 'sfm__threshold': 'median', 'rf__n_estimators': 225}

        # Assign the best model to best_random_forest
        best_random_forest = rso.best_estimator_

        # Initialize random_state, this is possible but it takes too long to do the search:
        best_random_forest.random_state = random_state 

        predictions = best_random_forest.predict(Xtest)
        predictions_train = best_random_forest.predict(Xtrain)

        #print('\n')
        #print("{} {} {} model\n".format(ticker, MLmodel, MLtype))
        #print("train accuracy: {:.4f}\ntest accuracy: {:.4f}\ntest error {:.4f}\n".format(accuracy_train, accuracy, error))

        
    ########################## SVM GRID ###########################################
        
    elif MLmodel == 'svm':
        
        if MLtype == 'classification':
            model = SVC()
            
        elif MLtype == 'regression':
            model = SVR()
            
        pipe = Pipeline([("scaler", MinMaxScaler()),("svm", model)])

        param_grid = [{'svm__kernel': ['rbf'], 
                       'svm__C': [0.001, 0.01, 0.1, 1, 10, 100], 
                        'svm__gamma': [0.001, 0.01, 0.1, 1, 10, 100]},
                      {'svm__kernel': ['linear'],
                      'svm__C': [0.001, 0.01, 0.1, 1, 10, 100]},
                      {'svm__kernel': ['poly'],
                       'svm__degree': [0, 1, 2, 3, 4, 5, 6]}]

        grid_search = GridSearchCV(pipe, param_grid, iid=False, cv=5, return_train_score=True)

        grid_search.fit(Xtrain, ytrain)

        #results = pd.DataFrame(grid_search.cv_results_)
        #print(results.T)
        best_parameters = grid_search.best_params_
        print("Best parameters scaling grid: {}".format(best_parameters))
        print("Best cross-validation score scaling grid: {:.2f}".format(grid_search.best_score_*100)) #default is accuracy
        print("Test score scaling grid (grid object): {:.2f}\n".format(grid_search.score(Xtest,ytest)*100)) #default is accuracy

        results = pd.DataFrame(grid_search.cv_results_)
        #print(results.T)
        #results.to_csv("results_svc_norm.csv")
        
        #set up and run the model with the best parameters
        scl = MinMaxScaler()
        scl.fit(Xtrain)
        Xtrain = scl.transform(Xtrain)
        Xtest = scl.transform(Xtest)
        
        if MLtype == 'classification':
            if best_parameters['svm__kernel'] == 'rbf':
                cls = SVC(kernel=best_parameters['svm__kernel'], C=best_parameters['svm__C'], 
                          gamma=best_parameters['svm__gamma'])

            elif best_parameters['svm__kernel'] == 'linear':
                cls = SVC(kernel=best_parameters['svm__kernel'], C=best_parameters['svm__C'])

            elif best_parameters['svm__kernel'] == 'poly':
                cls = SVC(kernel=best_parameters['svm__kernel'], C=best_parameters['svm__degree'])
                
        elif MLtype == 'regression':
            if best_parameters['svm__kernel'] == 'rbf':
                cls = SVR(kernel=best_parameters['svm__kernel'], C=best_parameters['svm__C'], 
                          gamma=best_parameters['svm__gamma'])

            elif best_parameters['svm__kernel'] == 'linear':
                cls = SVR(kernel=best_parameters['svm__kernel'], C=best_parameters['svm__C'])

            elif best_parameters['svm__kernel'] == 'poly':
                cls = SVR(kernel=best_parameters['svm__kernel'], C=best_parameters['svm__degree'])
                
        
        cls.fit(Xtrain, ytrain)
        
        predictions = cls.predict(Xtest)
        predictions_train = cls.predict(Xtrain)
        
        #if MLtype == 'classification':
        #    train_score = accuracy_score(ytrain, predictions_train)
        #    test_score = accuracy_score(ytest, predictions)
        
        #train_r2 = r2_score(ytrain, predictions_train)
        #test_r2 = r2_score(ytest, predictions)
        #error = mean_squared_error(ytest, predictions)
        
    ########################## GB GRID ###########################################
        
    elif MLmodel == 'gb':
        
        if MLtype == 'classification':
            model = GradientBoostingClassifier()
            pipe = Pipeline([("scaler", MinMaxScaler()),("gb", model)])
            
            param_grid = [{'gb__loss': ['deviance'],
                           'gb__n_estimators': [100, 500, 1000],
                           'gb__max_depth': [3, 5, 10]},
                         ]
            
        elif MLtype == 'regression':
            model = GradientBoostingRegressor()
            pipe = Pipeline([("scaler", MinMaxScaler()),("gb", model)])
            param_grid = [{'gb__loss': ['ls'],
                           'gb__n_estimators': [100, 500, 1000],
                           'gb__max_depth': [3, 5, 10]},
              
                          {'gb__loss': ['quantile'],
                           'gb__n_estimators': [100, 500, 1000],
                           'gb__max_depth': [3, 5, 10],
                           'gb__alpha': [0.9, 0.95, 0.99]}
                         ]

        grid_search = GridSearchCV(pipe, param_grid, iid=False, cv=5, return_train_score=True)

        grid_search.fit(Xtrain, ytrain)

        #results = pd.DataFrame(grid_search.cv_results_)
        #print(results.T)
        best_parameters = grid_search.best_params_
        print("Best parameters scaling grid: {}".format(best_parameters))
        print("Best cross-validation score scaling grid: {:.2f}".format(grid_search.best_score_*100)) #default is accuracy
        print("Test score scaling grid (grid object): {:.2f}\n".format(grid_search.score(Xtest,ytest)*100)) #default is accuracy
        
        #set up and run the model with the best parameters
        scl = MinMaxScaler()
        scl.fit(Xtrain)
        Xtrain = scl.transform(Xtrain)
        Xtest = scl.transform(Xtest)
        
        if MLtype == 'regression':
            
            if best_parameters['gb__loss'] == 'ls':
                cls = GradientBoostingRegressor(loss=best_parameters['gb__loss'], 
                                                n_estimators=best_parameters['gb__n_estimators'], 
                                                max_depth=best_parameters['gb__max_depth'])

            elif best_parameters['gb__loss'] == 'quantile':
                cls = GradientBoostingRegressor(loss=best_parameters['gb__loss'], 
                                                n_estimators=best_parameters['gb__n_estimators'], 
                                                max_depth=best_parameters['gb__max_depth'], 
                                                alpha=best_parameters['gb__alpha'])
                
        elif MLtype == 'classification':
            cls = GradientBoostingClassifier(loss=best_parameters['gb__loss'], 
                                               n_estimators=best_parameters['gb__n_estimators'], 
                                               max_depth=best_parameters['gb__max_depth'])

        cls.fit(Xtrain, ytrain)
        
        predictions = cls.predict(Xtest)
        predictions_train = cls.predict(Xtrain)
        
      
    #################################################################################
    
    train_r2 = r2_score(ytrain, predictions_train)
    test_r2 = r2_score(ytest, predictions) 
    error = mean_squared_error(ytest, predictions)
    
    if MLtype == 'classification':
        train_score = accuracy_score(ytrain, predictions_train)
        test_score = accuracy_score(ytest, predictions)
        
        df_scores = pd.DataFrame({ticker: {'train_accuracy': train_score, 'test_accuracy': test_score, 'trainr2': train_r2, 
                                           'testr2': test_r2, 'error': error}})
    else:
        df_scores = pd.DataFrame({ticker: {'trainr2': train_r2, 'testr2': test_r2, 'error': error}})
    
    print("{} {} {} model\n".format(ticker, MLmodel, MLtype))
    if MLtype == 'classification':
        print("Train accuracy: {:.2f}%".format(train_score*100))
        print("Test accuracy: {:.2f}%".format(test_score*100))
    print("Train r2 score: {:.4f}".format(train_r2))
    print("Test r2 score: {:.4f}".format(test_r2))
    print("Test MSE: {:.4f}".format(error))
     
    df_predictions = pd.DataFrame({ticker:predictions})
    df_train_predictions = pd.DataFrame({ticker:predictions_train})    
    
    return Xtrain, Xtest, df, dfprice, df_scores, df_predictions, df_train_predictions

In [0]:
def df_creator(tickers, MLmodel, MLtype, create_csv=True):

    predictions_dict = {}
    train_predictions_dict = {}
    dflist = []
    dflist_train = []
    dfscorelist = []

    for t in tickers:

        Xtrain, Xtest, df, dfprice, df_scores, df_predictions, df_train_predictions = ETF_predictions(t, MLmodel, MLtype)
        dflist.append(df_predictions)
        dflist_train.append(df_train_predictions)
        dfscorelist.append(df_scores)
        
    df_all = pd.concat(dflist, axis=1, ignore_index=True)
    df_all.columns = tickers

    df_all_train = pd.concat(dflist_train, axis=1, ignore_index=True)
    df_all_train.columns = tickers
    
    df_all_scores = pd.concat(dfscorelist, axis=1, ignore_index=True)
    df_all_scores.columns = tickers
    
    if create_csv == True:
        filename = MLmodel + '-' + MLtype + '-ETF-predictions.csv'
        filename2 = MLmodel + '-' + MLtype + '-ETF-train-predictions.csv'
        filename3 = MLmodel + '-' + MLtype + '-ETF-model-scores.csv'
        
        df_all.to_csv(filename)
        df_all_train.to_csv(filename2)
        df_all_scores.to_csv(filename3)
        

In [0]:
t1 = time.time()

In [0]:
tickers = ['FXU', 'GDX', 'KBE', 'RXI', 'VNQ', 'XLP', 'XRT', 'DIA', 'IHI', 'IYW', 'SHY', 'VIS', 'XLE', 'XLF', 'XLK', 'XLU', 
           'XLV', 'XLY', 'SIL']

In [0]:
# Run Gradient Boosting

#df_creator(tickers, 'gb', 'regression', create_csv=False)
#df_creator(tickers, 'gb', 'classification', create_csv=False)

Best parameters scaling grid: {'gb__loss': 'deviance', 'gb__max_depth': 5, 'gb__n_estimators': 1000}
Best cross-validation score scaling grid: 43.85
Test score scaling grid (grid object): 26.43

FXU gb classification model

Train accuracy: 100.00%
Test accuracy: 27.43%
Train r2 score: 1.0000
Test r2 score: -0.3949
Test MSE: 0.9202
Best parameters scaling grid: {'gb__loss': 'deviance', 'gb__max_depth': 10, 'gb__n_estimators': 100}
Best cross-validation score scaling grid: 46.87
Test score scaling grid (grid object): 30.67

GDX gb classification model

Train accuracy: 100.00%
Test accuracy: 37.41%
Train r2 score: 1.0000
Test r2 score: -1.6390
Test MSE: 0.9476
Best parameters scaling grid: {'gb__loss': 'deviance', 'gb__max_depth': 10, 'gb__n_estimators': 1000}
Best cross-validation score scaling grid: 37.92
Test score scaling grid (grid object): 39.65

KBE gb classification model

Train accuracy: 100.00%
Test accuracy: 43.64%
Train r2 score: 1.0000
Test r2 score: -1.0278
Test MSE: 1.0723


In [0]:
# Run Random Forest model

#df_creator(tickers, 'rf', 'regression', create_csv=False)
#df_creator(tickers, 'rf', 'classification', create_csv=False)

{'sfm__threshold': 'median', 'rf__n_estimators': 300, 'rf__max_features': 0.5, 'rf__max_depth': 4}
FXU rf regression model

Train r2 score: 0.6693
Test r2 score: -0.4971
Test MSE: 0.0061
{'sfm__threshold': 'median', 'rf__n_estimators': 300, 'rf__max_features': 0.5, 'rf__max_depth': 4}
GDX rf regression model

Train r2 score: 0.8842
Test r2 score: -4.2747
Test MSE: 0.0278
{'sfm__threshold': 'median', 'rf__n_estimators': 1000, 'rf__max_features': 0.2, 'rf__max_depth': 4}
KBE rf regression model

Train r2 score: 0.5664
Test r2 score: -0.2884
Test MSE: 0.0112
{'sfm__threshold': 'median', 'rf__n_estimators': 900, 'rf__max_features': 0.5, 'rf__max_depth': 4}
RXI rf regression model

Train r2 score: 0.7261
Test r2 score: -0.2863
Test MSE: 0.0054
{'sfm__threshold': 'median', 'rf__n_estimators': 300, 'rf__max_features': 0.5, 'rf__max_depth': 4}
VNQ rf regression model

Train r2 score: 0.7268
Test r2 score: -0.4598
Test MSE: 0.0061
{'sfm__threshold': 'median', 'rf__n_estimators': 300, 'rf__max_f

In [0]:
# Run SVM model

#df_creator(tickers, 'svm', 'regression', create_csv=False)
#df_creator(tickers, 'svm', 'classification', create_csv=False)

Best parameters scaling grid: {'svm__C': 0.1, 'svm__gamma': 10, 'svm__kernel': 'rbf'}
Best cross-validation score scaling grid: -29.54
Test score scaling grid (grid object): -27.05

FXU svm regression model

Train r2 score: 0.3052
Test r2 score: -0.2705
Test MSE: 0.0052
Best parameters scaling grid: {'svm__C': 0.01, 'svm__kernel': 'linear'}
Best cross-validation score scaling grid: -53.76
Test score scaling grid (grid object): -861.04

GDX svm regression model

Train r2 score: 0.4645
Test r2 score: -8.6104
Test MSE: 0.0507
Best parameters scaling grid: {'svm__C': 1, 'svm__gamma': 100, 'svm__kernel': 'rbf'}
Best cross-validation score scaling grid: -29.36
Test score scaling grid (grid object): -8.74

KBE svm regression model

Train r2 score: 0.6856
Test r2 score: -0.0874
Test MSE: 0.0094
Best parameters scaling grid: {'svm__C': 0.1, 'svm__gamma': 1, 'svm__kernel': 'rbf'}
Best cross-validation score scaling grid: -17.81
Test score scaling grid (grid object): -0.79

RXI svm regression mod

Best parameters scaling grid: {'svm__C': 0.1, 'svm__gamma': 0.1, 'svm__kernel': 'rbf'}
Best cross-validation score scaling grid: 51.02
Test score scaling grid (grid object): 10.72

SHY svm classification model

Train accuracy: 57.57%
Test accuracy: 10.72%
Train r2 score: -0.5376
Test r2 score: -5.4155
Test MSE: 2.9426
Best parameters scaling grid: {'svm__degree': 2, 'svm__kernel': 'poly'}
Best cross-validation score scaling grid: 43.82
Test score scaling grid (grid object): 33.17

VIS svm classification model

Train accuracy: 45.54%
Test accuracy: 28.93%
Train r2 score: -0.8189
Test r2 score: -1.5713
Test MSE: 1.0100
Best parameters scaling grid: {'svm__C': 100, 'svm__gamma': 0.1, 'svm__kernel': 'rbf'}
Best cross-validation score scaling grid: 49.57
Test score scaling grid (grid object): 37.16

XLE svm classification model

Train accuracy: 89.04%
Test accuracy: 37.16%
Train r2 score: 0.8320
Test r2 score: -1.2690
Test MSE: 1.3466
Best parameters scaling grid: {'svm__C': 100, 'svm__gamm

In [0]:
t2 = time.time()
t = t2-t1
print("total time in minutes: {}".format(t/60))
print("total time in hours: {}".format(t/3600))

total time in minutes: 135.42916345993677
total time in hours: 2.2571527243322795
