# AMS 520 Classifier Tests

In [1]:
import functools as fn

import pytz
import datetime
from datetime import datetime, timedelta
import math

import pandas as pd
import numpy as np
import scipy.stats as scs
from matplotlib import pyplot as plt

pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore")

# from sklearnex import patch_sklearn ## Must run patch before importing other sklearn functions!!!
# patch_sklearn()
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.svm import NuSVC
from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.inspection import DecisionBoundaryDisplay

## Load in Data

In [12]:
df_clean = pd.read_feather('clean_features_30e.f')
df_clean.columns

Index(['index', 'time', 'midup', 'middown', 'mideq', 'spreadup', 'spreaddown',
       'spreadeq', 'ask_price', 'bid_price', 'ask_volume', 'bid_volume',
       'mid_price', 'spread', 'dPask_dt', 'dPbid_dt', 'dVask_dt', 'dVbid_dt',
       'avg_trade_price_10ms', 'avg_trade_price_100ms', 'avg_trade_price_1s',
       'avg_trade_price_10s', 'avg_trade_volume_10ms',
       'avg_trade_volume_100ms', 'avg_trade_volume_1s',
       'avg_trade_volume_10s'],
      dtype='object')

# Fit XGBClassifier for Mid-Price Movement

### Format Data

In [3]:
# split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
# for train_index, test_index in split.split(df_clean, df_clean['midup']):
#     train_set = df_clean.loc[train_index]
#     test_set = df_clean.loc[test_index]
    
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
n = 10000
ind_select =  np.random.choice(np.arange(len(df_clean)),size=n)
df_select = df_clean.iloc[ind_select,:]
for train_index, test_index in split.split(df_select, df_select['midup']):
    train_set = df_clean[:n].loc[train_index]
    test_set = df_clean[:n].loc[test_index]

encoder = LabelEncoder()
#y_train = encoder.fit_transform(train_set['midup'])
y_train = np.where(np.array(train_set['midup']), 0, 1)
X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

### Perform Hyperparameter Grid Search

In [4]:
folds = 5
param_comb = 5
params = {
    "max_depth": [3, 5, 7],
    "learning_rate": [0.2, 0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    #"scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
    'reg_alpha':[0,0.01, 0.05]

}

xgb = XGBClassifier(booster='gbtree')

#skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)
#random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(X_train,y_train), verbose=3, random_state=1001 )
clf = GridSearchCV(estimator=xgb, param_grid=params, scoring='roc_auc', verbose=4)

clf.fit(X_train,y_train)

print('\n Best estimator:')
print(clf.best_estimator_)

Fitting 5 folds for each of 324 candidates, totalling 1620 fits
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.873 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.896 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.862 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.888 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.867 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.880 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, g

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.944 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.956 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.925 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.946 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.934 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.950 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0.01, 

[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.966 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.970 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.976 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.959 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.971 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.966 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_al

[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.821 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.839 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.859 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.831 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.850 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.821 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_a

[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.935 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.922 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.918 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.930 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.898 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.924 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, re

[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.744 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.708 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.740 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.753 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.732 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.743 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_

[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.820 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.842 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.818 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.847 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.867 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.827 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_a

[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.939 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.901 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.930 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.915 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.898 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.922 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=

[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.811 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.794 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.803 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.786 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.797 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.809 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3,

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.888 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.901 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.866 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.901 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.872 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.955 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=

[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.880 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.897 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.860 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.886 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.859 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.865 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, re

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.950 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.934 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.952 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.960 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.936 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.959 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, ma

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.978 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.959 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.971 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.965 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.967 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.975 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.818 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.838 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.854 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.831 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.850 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.821 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.911 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.936 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.918 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.914 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.930 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.898 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.753 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.732 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.744 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.708 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.740 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.753 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.830 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.831 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.852 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.820 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.842 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.818 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.905 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.936 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.917 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.923 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.939 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.901 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_ra

[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.796 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.805 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.783 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.793 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.772 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.798 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learni

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.916 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.886 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.904 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.919 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.880 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.910 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_ra

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.951 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.921 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.945 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.927 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.873 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.897 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_d

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.948 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.959 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.939 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.961 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.939 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.944 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda

[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.953 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.970 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.975 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.958 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.972 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.963 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alph

[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.847 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.827 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.833 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.849 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.818 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.836 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, re

[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.912 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.939 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.918 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.925 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.940 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.907 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_al

[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.962 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.939 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.956 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.945 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.743 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.753 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_a

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.844 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.865 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.824 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.854 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.829 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.830 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg

[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.891 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.927 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.941 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.903 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.935 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.915 total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, r

[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.804 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.779 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.796 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.805 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.783 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.796 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_dept

[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.881 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.912 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.886 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.899 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.918 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.881 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5,

[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.951 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.917 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.943 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.926 total time=   0.2s

 Best estimator:
XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.5,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, feature_types=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              intera

### Fit Best Estimator Model

In [7]:
xgb = clf.best_estimator_
xgb.fit(X_train, y_train)

xgb_predict = xgb.predict(test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]])

#y_test = encoder.fit_transform(test_set['y_mid'])
y_test = np.where(np.array(test_set['midup']), 0, 1)
print("XGBClassifier Results for Up/Down")
print(confusion_matrix(y_test, xgb_predict))
print(classification_report(y_test, xgb_predict))
print()
train_set['midup'].value_counts()/len(y_train)

XGBClassifier Results for Up/Down
[[949  68]
 [105 878]]
              precision    recall  f1-score   support

           0       0.90      0.93      0.92      1017
           1       0.93      0.89      0.91       983

    accuracy                           0.91      2000
   macro avg       0.91      0.91      0.91      2000
weighted avg       0.91      0.91      0.91      2000




True     0.518125
False    0.481875
Name: midup, dtype: float64

# Fit XGBClassifier for Spread Movement Up/Down

### Format Data

In [13]:
# split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
# for train_index, test_index in split.split(df_clean, df_clean['midup']):
#     train_set = df_clean.loc[train_index]
#     test_set = df_clean.loc[test_index]
    
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
n = 10000
ind_select =  np.random.choice(np.arange(len(df_clean)),size=n)
df_select = df_clean.iloc[ind_select,:]
for train_index, test_index in split.split(df_select, df_select['spreadup']):
    train_set = df_clean[:n].loc[train_index]
    test_set = df_clean[:n].loc[test_index]

encoder = LabelEncoder()
#y_train = encoder.fit_transform(train_set['midup'])
y_train = np.where(np.array(train_set['spreadup']), 0, 1)
X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

### Perform Hyperparameter Grid Search

In [14]:
folds = 5
param_comb = 5
params = {
    "max_depth": [3, 5, 7],
    "learning_rate": [0.2, 0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    #"scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
    'reg_alpha':[0,0.01, 0.05]

}

xgb = XGBClassifier(booster='gbtree')

#skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)
#random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(X_train,y_train), verbose=3, random_state=1001 )
clf = GridSearchCV(estimator=xgb, param_grid=params, scoring='roc_auc', verbose=4)

clf.fit(X_train,y_train)

print('\n Best estimator:')
print(clf.best_estimator_)

Fitting 5 folds for each of 324 candidates, totalling 1620 fits
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.917 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.912 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.910 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.912 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.914 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.930 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, g

[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.959 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.964 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.944 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.932 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.940 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.954 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg

[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.959 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.971 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.972 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.967 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.955 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.955 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_al

[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.860 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.863 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.863 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.890 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.880 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.878 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg

[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.933 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.934 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.948 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.943 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.927 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.923 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg

[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.751 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.757 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.779 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.756 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.770 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.745 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_

[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.753 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.776 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.781 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.754 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.868 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.856 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alp

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.920 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.904 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.900 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.902 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.903 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.806 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7

[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.821 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.848 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.830 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.840 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.841 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.842 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3

[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.913 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.927 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.913 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.887 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.891 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.895 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_dept

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.912 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.914 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.925 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.915 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.894 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.905 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.951 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.934 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.939 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.950 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.971 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.953 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_de

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.969 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.972 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.969 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.954 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.955 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.968 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.861 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.859 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.867 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.890 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.880 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.878 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.940 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.937 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.936 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.951 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.943 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.928 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.954 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.773 total time=   0.0s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.751 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.757 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.779 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.756 total time=   0.0s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_de

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.846 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.850 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.841 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.767 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.752 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.779 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_de

[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.933 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.918 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.913 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.918 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.919 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.919 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_ra

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.834 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.839 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.829 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.807 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.816 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.820 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learnin

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.916 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.913 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.916 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.925 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.933 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.911 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_ra

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.956 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.944 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.919 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.926 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.924 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.932 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_

[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.946 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.956 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.963 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.952 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.944 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.949 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=

[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.951 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.950 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.967 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.969 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.962 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.950 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.

[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.874 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.873 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.880 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.883 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.878 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.847 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.962 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.939 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.937 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.941 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.947 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.958 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_al

[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.966 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.962 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.947 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.943 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.940 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.951 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, 

[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.862 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=0.855 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.862 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.838 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.846 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=0.850 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0, reg_

[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.813 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.820 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=0.792 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.931 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.916 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=0.913 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_a

[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.823 total time=   0.0s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.835 total time=   0.0s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.834 total time=   0.0s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=0.839 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.829 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=3, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=0.807 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.942 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.916 total time=   0.1s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.919 total time=   0.1s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.915 total time=   0.1s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=0.924 total time=   0.1s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.932 total time=   0.1s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5,

[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=0.953 total time=   0.2s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.944 total time=   0.2s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.921 total time=   0.2s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.925 total time=   0.2s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.922 total time=   0.2s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=0.932 total time=   0.2s

 Best estimator:
XGBClassifier(base_score=0.5, booster='gbtree', callb

### Fit Best Estimator Model

In [15]:
xgb = clf.best_estimator_
xgb.fit(X_train, y_train)

xgb_predict = xgb.predict(test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]])

#y_test = encoder.fit_transform(test_set['y_mid'])
y_test = np.where(np.array(test_set['spreadup']), 0, 1)
print("XGBClassifier Results for Up/Down")
print(confusion_matrix(y_test, xgb_predict))
print(classification_report(y_test, xgb_predict))
print()
train_set['spreadup'].value_counts()/len(y_train)

XGBClassifier Results for Up/Down
[[ 157   85]
 [  22 1736]]
              precision    recall  f1-score   support

           0       0.88      0.65      0.75       242
           1       0.95      0.99      0.97      1758

    accuracy                           0.95      2000
   macro avg       0.92      0.82      0.86      2000
weighted avg       0.94      0.95      0.94      2000




False    0.876375
True     0.123625
Name: spreadup, dtype: float64

## Load in Data for New Clean Features

In [16]:
df_clean = pd.read_feather('new_clean_features_30e.f')
df_clean.columns

Index(['time', 'y_mid', 'y_spread', 'ask_price', 'bid_price', 'ask_volume',
       'bid_volume', 'mid_price', 'spread', 'dPask_dt', 'dPbid_dt', 'dVask_dt',
       'dVbid_dt', 'avg_trade_price_10ms', 'avg_trade_price_100ms',
       'avg_trade_price_1s', 'avg_trade_price_10s', 'avg_trade_volume_10ms',
       'avg_trade_volume_100ms', 'avg_trade_volume_1s',
       'avg_trade_volume_10s'],
      dtype='object')

# Fit XGBClassifier for Mid-Price Movement - 3

### Format Data

In [9]:
# split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
# for train_index, test_index in split.split(df_clean, df_clean['midup']):
#     train_set = df_clean.loc[train_index]
#     test_set = df_clean.loc[test_index]
    
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
n = 10000
ind_select =  np.random.choice(np.arange(len(df_clean)),size=n)
df_select = df_clean.iloc[ind_select,:]
for train_index, test_index in split.split(df_select, df_select['y_mid']):
    train_set = df_clean[:n].loc[train_index]
    test_set = df_clean[:n].loc[test_index]

encoder = LabelEncoder()
y_train = encoder.fit_transform(train_set['y_mid'])
#y_train = np.where(np.array(train_set['midup']), 0, 1)
X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

### Perform Hyperparameter Grid Search

In [10]:
folds = 5
param_comb = 5
params = {
    "max_depth": [3, 5, 7],
    "learning_rate": [0.2, 0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    #"scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
    'reg_alpha':[0,0.01, 0.05]

}

xgb = XGBClassifier(booster='gbtree')

#skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)
#random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(X_train,y_train), verbose=3, random_state=1001 )
clf = GridSearchCV(estimator=xgb, param_grid=params, scoring='roc_auc', verbose=4)

clf.fit(X_train,y_train)

print('\n Best estimator:')
print(clf.best_estimator_)

Fitting 5 folds for each of 324 candidates, totalling 1620 fits
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, lear

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.4s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.4s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, r

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, re

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=7, reg_alpha=0, reg

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, 

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.6s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_al

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, su

[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alp

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_dept

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_dep

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, re

[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, 

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, m

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, 

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alph

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0.01, reg_lambda=0

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, r

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, re

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0, reg

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=10, subsamp

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_al

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.8s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=1, su

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.01, reg_l

### Fit Best Estimator Model

In [11]:
xgb = clf.best_estimator_
xgb.fit(X_train, y_train)

xgb_predict = xgb.predict(test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]])

y_test = encoder.fit_transform(test_set['y_mid'])
#y_test = np.where(np.array(test_set['midup']), 0, 1)
print("XGBClassifier Results for Up/Down/Stationary")
print(confusion_matrix(y_test, xgb_predict))
print(classification_report(y_test, xgb_predict))
print()
train_set['y_mid'].value_counts()/len(y_train)

XGBClassifier Results for Up/Down/Stationary
[[625   1 217]
 [ 39   3  81]
 [134   0 900]]
              precision    recall  f1-score   support

           0       0.78      0.74      0.76       843
           1       0.75      0.02      0.05       123
           2       0.75      0.87      0.81      1034

    accuracy                           0.76      2000
   macro avg       0.76      0.55      0.54      2000
weighted avg       0.76      0.76      0.74      2000




Up            0.516000
Down          0.417125
Stationary    0.066875
Name: y_mid, dtype: float64

# Fit XGBClassifier for Spread Movement - 3

### Format Data

In [17]:
# split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
# for train_index, test_index in split.split(df_clean, df_clean['midup']):
#     train_set = df_clean.loc[train_index]
#     test_set = df_clean.loc[test_index]
    
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
n = 10000
ind_select =  np.random.choice(np.arange(len(df_clean)),size=n)
df_select = df_clean.iloc[ind_select,:]
for train_index, test_index in split.split(df_select, df_select['y_spread']):
    train_set = df_clean[:n].loc[train_index]
    test_set = df_clean[:n].loc[test_index]

encoder = LabelEncoder()
y_train = encoder.fit_transform(train_set['y_spread'])
#y_train = np.where(np.array(train_set['midup']), 0, 1)
X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

### Perform Hyperparameter Grid Search

In [18]:
folds = 5
param_comb = 5
params = {
    "max_depth": [3, 5, 7],
    "learning_rate": [0.2, 0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    #"scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
    'reg_alpha':[0,0.01, 0.05]

}

xgb = XGBClassifier(booster='gbtree')

#skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)
#random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(X_train,y_train), verbose=3, random_state=1001 )
clf = GridSearchCV(estimator=xgb, param_grid=params, scoring='roc_auc', verbose=4)

clf.fit(X_train,y_train)

print('\n Best estimator:')
print(clf.best_estimator_)

Fitting 5 folds for each of 324 candidates, totalling 1620 fits
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, lear

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=7, reg_alpha=0.01, r

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=3, reg_alpha=0.05, re

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=5, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.1, max_depth=7, reg_alpha=0, reg

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, 

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=5, reg_alpha=0

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.6s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.01, max_depth=7, reg_al

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=3, reg_alpha=

[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 4/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, su

[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=3, reg_alp

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.8s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.2, max_dept

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_dep

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.1, max_depth=7, reg_alpha=0, re

[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=3, 

[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max

[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.01, m

[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, 

[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.8s
[CV 4/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=0.25, learning_rate=0.05, max_depth=7, reg_alph

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=3, reg_alpha=0.01, reg_lambda=0

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=5, reg_alpha=0.01, r

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.2, max_depth=7, reg_alpha=0.05, re

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=3, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=5, reg_alpha=0, reg

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.8s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.8s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=7, reg_alpha=0, reg_lambda=10, subsamp

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.3s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0.01, reg_lambda=1, subsample=0.8;, score=nan total time=   0.3s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=3, reg_alpha=0

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.01, reg_lambda=10, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_alpha=0.05, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=5, reg_al

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=1, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=0.05, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.01, max_depth=7, reg_alpha=

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=0, subsample=0.8;, score=nan total time=   0.5s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=1, subsample=0.8;, score=nan total time=   0.5s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=5, reg_alpha=0, reg_lambda=1, su

[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 4/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.8s
[CV 5/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0, reg_lambda=10, subsample=0.8;, score=nan total time=   0.7s
[CV 1/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.01, reg_lambda=0, subsample=0.8;, score=nan total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=7, reg_alpha=0.01, reg_l

### Fit Best Estimator Model

In [19]:
xgb = clf.best_estimator_
xgb.fit(X_train, y_train)

xgb_predict = xgb.predict(test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]])

y_test = encoder.fit_transform(test_set['y_spread'])
#y_test = np.where(np.array(test_set['midup']), 0, 1)
print("XGBClassifier Results for Up/Down/Stationary")
print(confusion_matrix(y_test, xgb_predict))
print(classification_report(y_test, xgb_predict))
print()
train_set['y_spread'].value_counts()/len(y_train)

XGBClassifier Results for Up/Down/Stationary
[[1545    7    8]
 [ 159   19   15]
 [ 157    5   85]]
              precision    recall  f1-score   support

           0       0.83      0.99      0.90      1560
           1       0.61      0.10      0.17       193
           2       0.79      0.34      0.48       247

    accuracy                           0.82      2000
   macro avg       0.74      0.48      0.52      2000
weighted avg       0.80      0.82      0.78      2000




Down          0.7805
Up            0.1230
Stationary    0.0965
Name: y_spread, dtype: float64

# Fit XGBClassifier for Spread Movement

### Format Data

In [3]:
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
for train_index, test_index in split.split(df_clean, df_clean['y_spread']):
    train_set = df_clean.loc[train_index]
    test_set = df_clean.loc[test_index]

encoder = LabelEncoder()
y_train = encoder.fit_transform(train_set['y_spread'])
X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

### Perform Hyperparameter Grid Search

In [None]:
folds = 5
param_comb = 5
params = {
    "max_depth": [3, 4, 5, 7, 8],
    "learning_rate": [0.2,0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10, 11],
    #"scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
    'reg_alpha':[0,0.01, 0.05]

}

xgb = XGBClassifier(booster='gbtree')

#skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)
#random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(X_train,y_train), verbose=3, random_state=1001 )
clf = GridSearchCV(estimator=xgb, param_grid=params, scoring='roc_auc', verbose=1)

clf.fit(X_train,y_train)

print('\n Best estimator:')
print(clf.best_estimator_)

Fitting 5 folds for each of 720 candidates, totalling 3600 fits


### Fit Best Estimator Model

In [9]:
xgb = random_search.best_estimator_
xgb.fit(X_train, y_train)

xgb_predict = xgb.predict(test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]])

y_test = encoder.fit_transform(test_set['y_spread'])
print("XGBClassifier Results")
print(confusion_matrix(y_test, xgb_predict))
print(classification_report(y_test, xgb_predict))
print()
train_set['y_spread'].value_counts()/len(y_train)

Parameters: { "scale_pos_weight" } are not used.

XGBClassifier Results
[[64662     0     1]
 [ 7565     7     4]
 [ 5297     0    47]]
              precision    recall  f1-score   support

           0       0.83      1.00      0.91     64663
           1       1.00      0.00      0.00      7576
           2       0.90      0.01      0.02      5344

    accuracy                           0.83     77583
   macro avg       0.91      0.34      0.31     77583
weighted avg       0.86      0.83      0.76     77583




Down          0.833475
Stationary    0.097651
Up            0.068875
Name: y_spread, dtype: float64

# Fit scikit Models for Mid-Price Movement - "Up"

In [3]:
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
n = 10000
ind_select =  np.random.choice(np.arange(len(df_clean)),size=n)
df_select = df_clean.iloc[ind_select,:]
for train_index, test_index in split.split(df_select, df_select['y_mid']):
    train_set = df_clean[:n].loc[train_index]
    test_set = df_clean[:n].loc[test_index]

y_train = np.where(np.array(train_set['y_mid'])=='Up', 0, 1)

X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

models = (
    SVC(kernel="linear", C=0.25, verbose=True),
    LinearSVC(C=0.25, max_iter=10000, verbose=True),
    SVC(kernel="rbf", gamma=1.0, C=0.25, verbose=True),
    SVC(kernel="poly", degree=2, gamma=1.0, C=0.25,  coef0=1.0, verbose=True)
    #SVC(kernel="poly", degree=3, gamma=1.0, C=0.25, verbose=True),
)
models = (clf.fit(X_train, y_train) for clf in models)

In [None]:
y_test = np.where(np.array(test_set['y_mid'])=='Up', 0, 1)

X_test = test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

titles = (
    "SVC with linear kernel",
    "LinearSVC (linear kernel)",
    "SVC with RBF kernel",
    "SVC with polynomial (degree 2) kernel"
)


train_set['y_mid'].value_counts()/len(y_train)
print()

for clf, title in zip(models, titles):
    clf_predict = clf.predict(X_test)
    print(title)
    print(confusion_matrix(y_test, clf_predict))
    print(classification_report(y_test, clf_predict))
    print()


[LibSVM]SVC with linear kernel
[[493 545]
 [337 625]]
              precision    recall  f1-score   support

           0       0.59      0.47      0.53      1038
           1       0.53      0.65      0.59       962

    accuracy                           0.56      2000
   macro avg       0.56      0.56      0.56      2000
weighted avg       0.57      0.56      0.56      2000


[LibLinear]LinearSVC (linear kernel)
[[1038    0]
 [ 961    1]]
              precision    recall  f1-score   support

           0       0.52      1.00      0.68      1038
           1       1.00      0.00      0.00       962

    accuracy                           0.52      2000
   macro avg       0.76      0.50      0.34      2000
weighted avg       0.75      0.52      0.36      2000


[LibSVM]SVC with RBF kernel
[[1038    0]
 [ 962    0]]
              precision    recall  f1-score   support

           0       0.52      1.00      0.68      1038
           1       0.00      0.00      0.00       962

    ac

# Fit scikit Models for Mid-Price Movement - "Stationary"

In [None]:
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
n = 10000
ind_select =  np.random.choice(np.arange(len(df_clean)),size=n)
df_select = df_clean.iloc[ind_select,:]
for train_index, test_index in split.split(df_select, df_select['y_mid']):
    train_set = df_clean[:n].loc[train_index]
    test_set = df_clean[:n].loc[test_index]

y_train = np.where(np.array(train_set['y_mid'])=='Stationary', 0, 1)

X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

models = (
    SVC(kernel="linear", C=0.25, verbose=True),
    LinearSVC(C=0.25, max_iter=10000, verbose=True),
    SVC(kernel="rbf", gamma=1.0, C=0.25, verbose=True),
    SVC(kernel="poly", degree=2, gamma=1.0, C=0.25,  coef0=1.0, verbose=True)
    #SVC(kernel="poly", degree=3, gamma=1.0, C=0.25, verbose=True),
)
models = (clf.fit(X_train, y_train) for clf in models)

In [None]:
y_test = np.where(np.array(test_set['y_mid'])=='Stationary', 0, 1)

X_test = test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

titles = (
    "SVC with linear kernel",
    "LinearSVC (linear kernel)",
    "SVC with RBF kernel",
    "SVC with polynomial (degree 2) kernel"
)


train_set['y_mid'].value_counts()/len(y_train)
print()

for clf, title in zip(models, titles):
    clf_predict = clf.predict(X_test)
    print(title)
    print(confusion_matrix(y_test, clf_predict))
    print(classification_report(y_test, clf_predict))
    print()

# Fit scikit Models for Mid-Price Movement - "Down"

In [None]:
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
n = 10000
ind_select =  np.random.choice(np.arange(len(df_clean)),size=n)
df_select = df_clean.iloc[ind_select,:]
for train_index, test_index in split.split(df_select, df_select['y_mid']):
    train_set = df_clean[:n].loc[train_index]
    test_set = df_clean[:n].loc[test_index]

y_train = np.where(np.array(train_set['y_mid'])=='Down', 0, 1)

X_train = train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

models = (
    SVC(kernel="linear", C=0.25, verbose=True),
    LinearSVC(C=0.25, max_iter=10000, verbose=True),
    SVC(kernel="rbf", gamma=1.0, C=0.25, verbose=True),
    SVC(kernel="poly", degree=2, gamma=1.0, C=0.25,  coef0=1.0, verbose=True)
    #SVC(kernel="poly", degree=3, gamma=1.0, C=0.25, verbose=True),
)
models = (clf.fit(X_train, y_train) for clf in models)

In [None]:
y_test = np.where(np.array(test_set['y_mid'])=='Down', 0, 1)

X_test = test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]]

titles = (
    "SVC with linear kernel",
    "LinearSVC (linear kernel)",
    "SVC with RBF kernel",
    "SVC with polynomial (degree 2) kernel"
)


train_set['y_mid'].value_counts()/len(y_train)
print()

for clf, title in zip(models, titles):
    clf_predict = clf.predict(X_test)
    print(title)
    print(confusion_matrix(y_test, clf_predict))
    print(classification_report(y_test, clf_predict))
    print()

## Fit XGBClassifier for Spread Movement

In [51]:
split  = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3407)
for train_index, test_index in split.split(df_clean, df_clean['y_spread']):
    train_set = df_clean.loc[train_index]
    test_set = df_clean.loc[test_index]

encoder = LabelEncoder()
y_train = encoder.fit_transform(train_set['y_spread'])

xgb = XGBClassifier(booster='gbtree')

xgb.fit(train_set[['ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]], 
        y_train
        )

[0 2 2 ... 1 2 2]


In [52]:
xgb_predict = xgb.predict(test_set[[
                   'ask_price', 'bid_price', 'ask_volume', 'bid_volume', 
                   'mid_price', 'spread', 
                   'dPask_dt', 'dPbid_dt', 'dVbid_dt', 'dVask_dt',
                   'avg_trade_price_10ms','avg_trade_volume_10ms',
                   'avg_trade_price_100ms','avg_trade_volume_100ms',
                   'avg_trade_price_1s','avg_trade_volume_1s',
                   'avg_trade_price_10s','avg_trade_volume_10s' 
                  ]])

y_test = encoder.fit_transform(test_set['y_spread'])
print("XGBClassifier Results")
print(confusion_matrix(y_test, xgb_predict))
print(classification_report(y_test, xgb_predict))
print()
train_set['y_spread'].value_counts()/len(y_train)

XGBClassifier Results
[[19542    42 13393]
 [ 3446   215  5384]
 [ 8389    49 27123]]
              precision    recall  f1-score   support

           0       0.62      0.59      0.61     32977
           1       0.70      0.02      0.05      9045
           2       0.59      0.76      0.67     35561

    accuracy                           0.60     77583
   macro avg       0.64      0.46      0.44     77583
weighted avg       0.62      0.60      0.57     77583




Up            0.458365
Down          0.425056
Stationary    0.116579
Name: y_mid, dtype: float64