### library

In [257]:
import warnings
warnings.filterwarnings('ignore')
import glob
import os
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier
from xgboost import plot_importance
import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score
from sklearn import svm
import seaborn as sns; sns.set()

import FinanceDataReader as fdr
import talib

### data loading

In [258]:
df_price = fdr.DataReader(symbol="005930", start='2011', end='2021') # 2011~2020년 삼성전자 주가데이터

In [259]:
data_dir = 'C:/Users/김건우/Desktop/UNIST/4학년 1학기/기계학습 응용/'
df_fundamental = pd.read_excel(data_dir + 'Data_1.xlsx', index_col = 0)

In [260]:
df = pd.merge(df_price, df_fundamental, on='Date')

In [261]:
# 거래정지로 인한 결측치 제거
mask = df[df['Open']==0]
df = df.drop(mask.index)
mask

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,외국인지분율,60일변동성(표준편차),60일베타,60일알파,...,"P/E(MAIN, TTM)","P/B(MAIN, TTM)",P/C(TTM),P/CE(TTM),P/S(TTM),P/FCF1(TTM),P/FCF2(TTM),EV/EBITDA(TTM),신용융자잔고금액,60일누적대차거래잔고증감비중
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-04-30,0,0,0,53000,0,0.0,52.06,0.019769,1.505725,0.163186,...,8.68,1.67,5.88,5.69,1.58,35.68,16.82,3.66,344599630,-1.59
2018-05-02,0,0,0,53000,0,0.0,52.06,0.019767,1.504048,0.17758,...,8.68,1.67,5.88,5.69,1.58,35.68,16.82,3.66,389517460,-1.63
2018-05-03,0,0,0,53000,0,0.0,52.06,0.018929,1.434582,0.223318,...,8.68,1.67,5.88,5.69,1.58,35.68,16.82,3.66,389079930,-1.6


### target label

In [262]:
df['log_return'] = np.log(df['Close']/ df['Close'].shift(1))
df['cum_rtn_1M']=df['log_return'].rolling(20).sum()
df['target'] = df['cum_rtn_1M'].shift(-19) # -> 데이터 맨뒤에도 패딩 필요
df['target'].describe(percentiles = [.2, .4, .6, .8])

count    2440.000000
mean        0.011072
std         0.069638
min        -0.334308
20%        -0.047415
40%        -0.005069
50%         0.013921
60%         0.031221
80%         0.070880
max         0.216260
Name: target, dtype: float64

In [263]:
def classify_label(target):
    if target > 0.07:                      # 7% 이상
        target =  2
    elif target > 0.03 and target <= 0.07: # 3~7% 사이
        target = 1
    elif target > -0.01 and target <= 0.03: # --1~3% 사이
        target = 0
    elif target > -0.05 and target <= -0.01:  # -5~-1% 사이
        target = -1
    elif target <= -0.05: # -5% 이하
        target = -2
    else:
        target = np.nan # 예외처리
        
    return target

In [264]:
df['target'] = df['target'].apply(classify_label)

In [265]:
df.drop(columns=['Change', 'log_return', 'cum_rtn_1M'], inplace=True)

### 기술적 지표 구현

#### price indicator

In [266]:
# 이동평균
df['MA_5'] = talib.SMA(df['Close'], timeperiod=5)
df['MA_10'] = talib.SMA(df['Close'], timeperiod=10)
df['MA_20'] = talib.SMA(df['Close'], timeperiod=20)
df['MA_60'] = talib.SMA(df['Close'], timeperiod=60)
df['MA_120'] = talib.SMA(df['Close'], timeperiod=120)

# 볼린저밴드
df['BB_Up'] = talib.BBANDS(df['Close'], timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[0] # 볼린저밴드 상단
df['BB_Down'] = talib.BBANDS(df['Close'], timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[2] #볼린저밴드 하단

# 파라볼릭SAR
df['PSAR'] = talib.SAR(df['High'], df['Low'], acceleration=0.02, maximum=0.2)

#### momentum indicator

In [267]:
df['RSI'] = talib.RSI(df['Close'], timeperiod = 14)
df['MACD'] = talib.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)[0]

#### volume indicator

In [268]:
df['CO'] = talib.ADOSC(df['High'], df['Low'], df['Close'], df['Volume'], fastperiod=3, slowperiod=10) # Chaikin Oscillator

#### volatility indicator

In [269]:
df['ATR']=talib.ATR(df['High'], df['Low'], df['Close'], timeperiod=14)

#### cycle indicator

In [270]:
df['HT_DCPERIOD']=talib.HT_DCPERIOD(df['Close'])

In [271]:
# 기술적 지표로 인한 결측치 제거
df.dropna(axis=0, how='any', inplace = True)

In [272]:
"""
_open, _high, _low, _close, _volume = df['Open'], df['High'], df['Low'], df['Close'], df['Volume']

df['DEMA'] = talib.DEMA(_close)
df['EMA'] = talib.EMA(_close)
df['HT_TRENDLINE'] = talib.HT_TRENDLINE(_close)
df['KAMA'] = talib.KAMA(_close)
df['MAMA'], df['FAMA'] = talib.MAMA(_close)
df['SAREXT'] = talib.SAREXT(_high, _low)
df['T3'] = talib.T3(_close)
df['ADXR'] = talib.ADXR(_high, _low, _close)
df['APO'] = talib.APO(_close)
df['AROONOSC'] = talib.AROONOSC(_high, _low)
df['BOP'] = talib.BOP(_open, _high, _low, _close)
df['CCI'] = talib.CCI(_high, _low, _close)
df['CMO'] = talib.CMO(_close)
df['DX'] = talib.DX(_high, _low, _close)
df['MFI'] = talib.MFI(_high, _low, _close, _volume)
df['MOM'] = talib.MOM(_close)
df['PPO'] = talib.PPO(_close)
df['STOCH_K'], df['STOCH_D'] = talib.STOCH(_high, _low, _close)
df['TRIX'] = talib.TRIX(_close)
df['ULTOSC'] = talib.ULTOSC(_high, _low, _close)
df['WILLR'] = talib.WILLR(_high, _low, _close)
"""

"\n_open, _high, _low, _close, _volume = df['Open'], df['High'], df['Low'], df['Close'], df['Volume']\n\ndf['DEMA'] = talib.DEMA(_close)\ndf['EMA'] = talib.EMA(_close)\ndf['HT_TRENDLINE'] = talib.HT_TRENDLINE(_close)\ndf['KAMA'] = talib.KAMA(_close)\ndf['MAMA'], df['FAMA'] = talib.MAMA(_close)\ndf['SAREXT'] = talib.SAREXT(_high, _low)\ndf['T3'] = talib.T3(_close)\ndf['ADXR'] = talib.ADXR(_high, _low, _close)\ndf['APO'] = talib.APO(_close)\ndf['AROONOSC'] = talib.AROONOSC(_high, _low)\ndf['BOP'] = talib.BOP(_open, _high, _low, _close)\ndf['CCI'] = talib.CCI(_high, _low, _close)\ndf['CMO'] = talib.CMO(_close)\ndf['DX'] = talib.DX(_high, _low, _close)\ndf['MFI'] = talib.MFI(_high, _low, _close, _volume)\ndf['MOM'] = talib.MOM(_close)\ndf['PPO'] = talib.PPO(_close)\ndf['STOCH_K'], df['STOCH_D'] = talib.STOCH(_high, _low, _close)\ndf['TRIX'] = talib.TRIX(_close)\ndf['ULTOSC'] = talib.ULTOSC(_high, _low, _close)\ndf['WILLR'] = talib.WILLR(_high, _low, _close)\n"

In [273]:
#df['target'] = np.where(df['target']>0,1,-1)
df['target'].value_counts()

 0.0    512
 2.0    495
 1.0    481
-2.0    439
-1.0    395
Name: target, dtype: int64

In [274]:
class_num = len(df['target'].unique())

In [275]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,외국인지분율,60일변동성(표준편차),60일베타,60일알파,배당수익률(FY0),...,MA_60,MA_120,BB_Up,BB_Down,PSAR,RSI,MACD,CO,ATR,HT_DCPERIOD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-06-28,16800,17060,16620,16640,239178,50.88,0.01798,1.022287,-0.143621,1.2,...,17693.0,18174.166667,17059.437758,16348.562242,17080.0,42.611579,-304.897785,-15341.128118,458.793021,17.738586
2011-06-29,16960,16980,16640,16660,207146,50.92,0.017934,0.993777,-0.170791,1.2,...,17657.333333,18153.333333,17044.5124,16419.4876,17060.0,42.963277,-293.176586,-95319.607354,450.307806,19.161649
2011-06-30,16660,16700,16420,16520,328339,50.95,0.017929,0.992857,-0.20264,1.21,...,17617.666667,18131.333333,17061.262079,16362.737921,17060.0,41.066085,-291.820357,-149221.657282,438.142962,20.537234
2011-07-01,16860,17200,16760,17100,442918,50.99,0.018549,1.035295,-0.152594,1.17,...,17587.666667,18116.833333,17118.684684,16329.315316,15900.0,50.765919,-241.164391,-80836.824566,455.418465,21.841461
2011-07-04,17400,17660,17380,17560,429433,51.01,0.018679,1.051766,-0.088197,1.14,...,17572.666667,18108.166667,17668.072535,16123.927465,15926.0,56.834048,-162.033199,-6473.045312,462.888575,23.196241


In [276]:
df['target'] = df['target'].astype(np.int64)
y_var = df['target']
#x_var = df.drop(['target','OPEN','HIGH','LOW','VOLUME'],axis=1)
x_var = df.drop(['target'],axis=1)

### train/test data splitting

In [277]:
X_train, X_test, y_train, y_test = train_test_split(x_var, y_var, test_size=0.3, shuffle=False, random_state=3)

train_count = y_train.count()
test_count = y_test.count()

print('train set label ratio')
print(y_train.value_counts()/train_count)
print('test set label ratio')
print(y_test.value_counts()/test_count)

train set label ratio
 0    0.219692
 1    0.211692
 2    0.211077
-2    0.190154
-1    0.167385
Name: target, dtype: float64
test set label ratio
 0    0.222382
 2    0.218077
 1    0.196557
-2    0.186514
-1    0.176471
Name: target, dtype: float64


In [278]:
# original data shape -> 33 features
X_train.shape

(1625, 33)

In [279]:
# after PCA -> 4 features
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,외국인지분율,60일변동성(표준편차),60일베타,60일알파,배당수익률(FY0),...,MA_60,MA_120,BB_Up,BB_Down,PSAR,RSI,MACD,CO,ATR,HT_DCPERIOD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-06-28,16800,17060,16620,16640,239178,50.88,0.01798,1.022287,-0.143621,1.2,...,17693.0,18174.166667,17059.437758,16348.562242,17080.0,42.611579,-304.897785,-15341.128118,458.793021,17.738586
2011-06-29,16960,16980,16640,16660,207146,50.92,0.017934,0.993777,-0.170791,1.2,...,17657.333333,18153.333333,17044.5124,16419.4876,17060.0,42.963277,-293.176586,-95319.607354,450.307806,19.161649
2011-06-30,16660,16700,16420,16520,328339,50.95,0.017929,0.992857,-0.20264,1.21,...,17617.666667,18131.333333,17061.262079,16362.737921,17060.0,41.066085,-291.820357,-149221.657282,438.142962,20.537234
2011-07-01,16860,17200,16760,17100,442918,50.99,0.018549,1.035295,-0.152594,1.17,...,17587.666667,18116.833333,17118.684684,16329.315316,15900.0,50.765919,-241.164391,-80836.824566,455.418465,21.841461
2011-07-04,17400,17660,17380,17560,429433,51.01,0.018679,1.051766,-0.088197,1.14,...,17572.666667,18108.166667,17668.072535,16123.927465,15926.0,56.834048,-162.033199,-6473.045312,462.888575,23.196241


# Reducing dimension

### PCA

In [165]:
from sklearn.decomposition import PCA

pca = PCA(n_components=4,svd_solver='auto')
scores = pca.fit_transform(x_var)

In [166]:
pca.explained_variance_

array([8.86174783e+15, 3.53330834e+13, 3.41245754e+13, 7.49424809e+08])

In [167]:
ratio = pca.explained_variance_ratio_
ratio

array([9.92222952e-01, 3.95613788e-03, 3.82082492e-03, 8.39108165e-08])

In [168]:
principalComponents = pca.fit_transform(x_var)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['pc1', 'pc2', 'pc3','pc4'])

In [169]:
x_var = principalDf

In [170]:
X_train, X_test, y_train, y_test = train_test_split(x_var, y_var, test_size=0.3, shuffle=False, random_state=3)

In [308]:
len(X_train)

1625

### evaluation metrics

In [28]:
def get_confusion_matrix(y_test,pred):
    confusion = confusion_matrix(y_test,pred)
    accuracy = accuracy_score(y_test,pred)
    #precision = precision_score(y_test,pred)
    #recall = recall_score(y_test,pred)
    #f1 = f1_score(y_test,pred)
    #roc_score = roc_auc_score(y_test,pred)
    #print(confusion)
    print('accuracy:{0:.4f}'.format(accuracy))
    #print('accuracy:{0:.4f},precision:{1:.4f},recall:{2:.4f},F1:{3:.4f},ROC AUC score:{4:.4f}'.format(accuracy,precision,recall,f1,roc_score))

### xgboost 

In [225]:
n_estimators = range(100,500,100)
params ={
    'objective': ['multi:softmax'],
    'num_class': [class_num],
    'n_estimators':n_estimators,
    'max_depth':[4,6],
    'gamma' : [0,0.1],
    'min_child_weight': [1,3,5],
    'subsample' : [0.6,0.8],
    'colsample_bytree' : [0.6,0.8],
    'learning_rate' : [0.05,0.1],
    
}

In [226]:
my_cv = TimeSeriesSplit(n_splits=5).split(X_train)

In [227]:
xgb = GridSearchCV(XGBClassifier(),params,cv=my_cv,n_jobs=-1)
xgb.fit(X_train,y_train)



GridSearchCV(cv=<generator object TimeSeriesSplit.split at 0x000001B020AF34A0>,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     miss...
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, validate_parameters=None,
                                     verbosity=None),
             n_jobs=-1,
             p

In [228]:
print('best parameters\n',xgb.best_params_)
print('best prediction:{0:.4f}\n'.format(xgb.best_score_))

best parameters
 {'colsample_bytree': 0.6, 'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 4, 'min_child_weight': 5, 'n_estimators': 100, 'num_class': 5, 'objective': 'multi:softmax', 'subsample': 0.6}
best prediction:0.2630



In [229]:
xgb_pred = xgb.predict(X_test)
accuracy_con = accuracy_score(y_test,xgb_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
#get_confusion_matrix(y_test,xgb_pred)

accuracy:0.1951


### light gbm

In [286]:
n_estimators = range(100,500,100)
params ={
    #'objective ': ['multiclass'],
    'num_class': [class_num],
    'boosting_type' : ['gbdt'],
    'n_estimators': n_estimators,
    'num_boost_round' : [100,200,300],
    'metric' : ['multi_logloss'],
    'learning_rate ':[0.05,0.1],
    'num_leaves':[20],
    'max_depth':[4,6,8],
    'sub_feature':[0.4,0.6],
    'min_data_in_leaf' : range(30,60,10),
    'feature_fraction': [0.4,0.6]
}

In [287]:
my_cv = TimeSeriesSplit(n_splits=5).split(X_train)

In [288]:
'''
차원축소하지 않은 원본데이터 사용할때, column명 바꿔주기
'''
X_train.columns = [i for i in range(len(X_train.columns))]
X_test.columns = [i for i in range(len(X_test.columns))]

In [289]:
lgb = GridSearchCV(LGBMClassifier(objective='multiclass'),params,cv=my_cv,n_jobs=-1)
lgb.fit(X_train,y_train)

GridSearchCV(cv=<generator object TimeSeriesSplit.split at 0x000001B029E41CF0>,
             estimator=LGBMClassifier(objective='multiclass'), n_jobs=-1,
             param_grid={'boosting_type': ['gbdt'],
                         'feature_fraction': [0.4, 0.6],
                         'learning_rate ': [0.05, 0.1], 'max_depth': [4, 6, 8],
                         'metric': ['multi_logloss'],
                         'min_data_in_leaf': range(30, 60, 10),
                         'n_estimators': range(100, 500, 100),
                         'num_boost_round': [100, 200, 300], 'num_class': [5],
                         'num_leaves': [20], 'sub_feature': [0.4, 0.6]})

In [290]:
print('best parameters\n',lgb.best_params_)
print('best prediction:{0:.4f}\n'.format(lgb.best_score_))

best parameters
 {'boosting_type': 'gbdt', 'feature_fraction': 0.6, 'learning_rate ': 0.05, 'max_depth': 8, 'metric': 'multi_logloss', 'min_data_in_leaf': 30, 'n_estimators': 100, 'num_boost_round': 100, 'num_class': 5, 'num_leaves': 20, 'sub_feature': 0.4}
best prediction:0.2504



In [291]:
lgb_pred = lgb.predict(X_test)
accuracy_con = accuracy_score(y_test,lgb_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,lgb_pred)

accuracy:0.2052
accuracy:0.2052


### random forest

In [239]:
n_estimators = range(100,500,100)
rf_param = {
    'n_estimators':n_estimators,
    'max_depth':[4,6,8],
    'min_samples_leaf':[3,4,5,6],
    'min_samples_split':[2,3,4]
}

In [240]:
my_cv = TimeSeriesSplit(n_splits=5).split(X_train)

In [241]:
rf = GridSearchCV(RandomForestClassifier(), param_grid = rf_param, scoring='accuracy', cv=my_cv,n_jobs=-1, verbose=2)
rf.fit(X_train,y_train)

Fitting 5 folds for each of 144 candidates, totalling 720 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   23.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   52.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  3.7min finished


GridSearchCV(cv=<generator object TimeSeriesSplit.split at 0x000001B020AB4900>,
             estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'max_depth': [4, 6, 8],
                         'min_samples_leaf': [3, 4, 5, 6],
                         'min_samples_split': [2, 3, 4],
                         'n_estimators': range(100, 500, 100)},
             scoring='accuracy', verbose=2)

In [242]:
print('best parameters\n',rf.best_params_)
print('best prediction:{0:.4f}\n'.format(rf.best_score_))

best parameters
 {'max_depth': 4, 'min_samples_leaf': 5, 'min_samples_split': 4, 'n_estimators': 100}
best prediction:0.2674



In [243]:
rf_pred = rf.predict(X_test)
accuracy_con = accuracy_score(y_test,rf_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,rf_pred)

accuracy:0.2339
accuracy:0.2339


### ExtraTreesClassifier

In [244]:
my_cv = TimeSeriesSplit(n_splits=5).split(X_train)

In [245]:
n_estimators = range(100,500,100)

ext_param = {
    'n_estimators':n_estimators,
    'max_depth':[4,6,8],
    'min_samples_leaf':[3,4,5,6],
    'min_samples_split':[2,3,4]
}

In [246]:
ext = GridSearchCV(ExtraTreesClassifier(), param_grid = ext_param, scoring='accuracy', cv=my_cv,n_jobs=-1, verbose=2)
ext.fit(X_train,y_train)

Fitting 5 folds for each of 144 candidates, totalling 720 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   24.2s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   53.8s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  1.9min finished


GridSearchCV(cv=<generator object TimeSeriesSplit.split at 0x000001B029C73F20>,
             estimator=ExtraTreesClassifier(), n_jobs=-1,
             param_grid={'max_depth': [4, 6, 8],
                         'min_samples_leaf': [3, 4, 5, 6],
                         'min_samples_split': [2, 3, 4],
                         'n_estimators': range(100, 500, 100)},
             scoring='accuracy', verbose=2)

In [247]:
print('best parameters\n',ext.best_params_)
print('best prediction:{0:.4f}\n'.format(ext.best_score_))

best parameters
 {'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 2, 'n_estimators': 100}
best prediction:0.2711



In [248]:
ext_pred = ext.predict(X_test)
accuracy_con = accuracy_score(y_test,ext_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,ext_pred)

accuracy:0.2009
accuracy:0.2009


### stacking

### XGB + ET

In [249]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

ext_clf = ExtraTreesClassifier(random_state=0,
                              max_depth=ext.best_params_['max_depth'],
                              min_samples_leaf=ext.best_params_['min_samples_leaf'],
                              min_samples_split=ext.best_params_['min_samples_split'],
                              n_estimators=ext.best_params_['n_estimators'])

ensemble_models =[xgb_clf ,ext_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [2]

model  0:     [XGBClassifier]
    fold  0:  [0.35135135]
    fold  1:  [0.15517241]
    fold  2:  [0.25123153]
    fold  3:  [0.16256158]
    ----
    MEAN:     [0.23007922] + [0.07956760]
    FULL:     [0.23015385]

model  1:     [ExtraTreesClassifier]
    fold  0:  [0.31449631]
    fold  1:  [0.12315271]
    fold  2:  [0.22413793]
    fold  3:  [0.13054187]
    ----
    MEAN:     [0.19808221] + [0.07811413]
    FULL:     [0.19815385]

accuracy:0.1722
accuracy:0.1722


In [250]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  2,  2,  1,  2,  2,  2,
        1,  2,  2,  1,  1,  2,  2,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  1,  2,  2,  2,  1,  1,  2,  2,  2,  1,  1,  1,  2,  2,  2,  2,
        2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  1,  1,  2,  2, -2,  2,  1,
        1,  1,  0,  0,  0,  1,  2,  2,  2,  2,  0,  2,  1,  1,  1,  2,  2,
        2,  2,  2,  1,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,
        1,  0, -1,  1,  1,  1,  2,  1,  1,  1, -1, -1, -1,  1,  1, -1, -1,
        2,  2,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,
        1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  2,  1,  2,  1,  2,  2,  2,  2,  2,
        1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1

### XGB + RF

In [251]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

rf_clf = RandomForestClassifier(random_state=0,
                              max_depth=rf.best_params_['max_depth'],
                              min_samples_leaf=rf.best_params_['min_samples_leaf'],
                              min_samples_split=rf.best_params_['min_samples_split'],
                              n_estimators=rf.best_params_['n_estimators'])

ensemble_models =[xgb_clf, rf_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [2]

model  0:     [XGBClassifier]
    fold  0:  [0.35135135]
    fold  1:  [0.15517241]
    fold  2:  [0.25123153]
    fold  3:  [0.16256158]
    ----
    MEAN:     [0.23007922] + [0.07956760]
    FULL:     [0.23015385]

model  1:     [RandomForestClassifier]
    fold  0:  [0.34152334]
    fold  1:  [0.13300493]
    fold  2:  [0.24137931]
    fold  3:  [0.19211823]
    ----
    MEAN:     [0.22700645] + [0.07644307]
    FULL:     [0.22707692]

accuracy:0.2152
accuracy:0.2152


In [252]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1, -2,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  1,  1,  1,  2,  2,  2,  2,
        2,  2,  2,  1,  1,  2,  2,  2,  1,  2,  1,  1,  1,  1,  0,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,
        2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1,
        1, -2, -2,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1, -2, -2,  2,  1,
        1,  1, -1,  1,  1,  1,  2,  2,  2,  2,  0,  0, -1,  1,  1,  0,  0,
        2,  2,  2,  2,  2,  2, -1, -1, -1,  2,  2,  2,  2,  2,  2,  2, -2,
       -2, -2, -2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1, -2,  1,  1,  1,  1,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  2,  1,  2,  2,  2,  2,  2,
        1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1

### XGB + LGB

In [292]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

lgb_clf = LGBMClassifier(random_state=0,
                        boosting_type = lgb.best_params_['boosting_type'],
                        feature_fraction = lgb.best_params_['feature_fraction'],
                        min_data_in_leaf = lgb.best_params_['min_data_in_leaf'],
                        sub_feature = lgb.best_params_['sub_feature'],
                        max_depth = lgb.best_params_['max_depth'],
                        metric = lgb.best_params_['metric'],
                        n_estimators = lgb.best_params_['n_estimators'],
                        num_boost_round = lgb.best_params_['num_boost_round'],
                        num_leaves = lgb.best_params_['num_leaves'],
                        )


ensemble_models =[xgb_clf, lgb_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [2]

model  0:     [XGBClassifier]
    fold  0:  [0.35135135]
    fold  1:  [0.15517241]
    fold  2:  [0.25123153]
    fold  3:  [0.16256158]
    ----
    MEAN:     [0.23007922] + [0.07956760]
    FULL:     [0.23015385]

model  1:     [LGBMClassifier]
    fold  0:  [0.33660934]
    fold  1:  [0.17733990]
    fold  2:  [0.25123153]
    fold  3:  [0.18226601]
    ----
    MEAN:     [0.23686169] + [0.06457480]
    FULL:     [0.23692308]

accuracy:0.2123
accuracy:0.2123


In [293]:
y_pred

array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,
        0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  1,  1,
        2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  1,  2,  1,  1,
        1,  2,  1,  1,  0,  2,  1,  1,  2,  2,  0,  0,  0,  0,  0,  0,  1,
        0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  1,
        2,  1,  1,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  1,  0,  2,  2,  2,  2,  1,  0,  0,  0,  1,  1,  0,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  0,
        0,  0,  0,  2,  2,  1,  1,  1,  1,  1,  1,  1,  2,  1,  1, -1, -1,
       -1, -1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  0,  0,  0,  0,  1,  1,
        1,  2,  2,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  0,  1,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  0,  0,
        0,  0,  0,  0,  0

### LGB + RF

In [294]:
from vecstack import stacking

lgb_clf = LGBMClassifier(random_state=0,
                        boosting_type = lgb.best_params_['boosting_type'],
                        feature_fraction = lgb.best_params_['feature_fraction'],
                        min_data_in_leaf = lgb.best_params_['min_data_in_leaf'],
                        sub_feature = lgb.best_params_['sub_feature'],
                        max_depth = lgb.best_params_['max_depth'],
                        metric = lgb.best_params_['metric'],
                        n_estimators = lgb.best_params_['n_estimators'],
                        num_boost_round = lgb.best_params_['num_boost_round'],
                        num_leaves = lgb.best_params_['num_leaves'],
                        )

rf_clf = RandomForestClassifier(random_state=0,
                              max_depth=rf.best_params_['max_depth'],
                              min_samples_leaf=rf.best_params_['min_samples_leaf'],
                              min_samples_split=rf.best_params_['min_samples_split'],
                              n_estimators=rf.best_params_['n_estimators'])

ensemble_models =[lgb_clf, rf_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [2]

model  0:     [LGBMClassifier]
    fold  0:  [0.33660934]
    fold  1:  [0.17733990]
    fold  2:  [0.25123153]
    fold  3:  [0.18226601]
    ----
    MEAN:     [0.23686169] + [0.06457480]
    FULL:     [0.23692308]

model  1:     [RandomForestClassifier]
    fold  0:  [0.34152334]
    fold  1:  [0.13300493]
    fold  2:  [0.24137931]
    fold  3:  [0.19211823]
    ----
    MEAN:     [0.22700645] + [0.07644307]
    FULL:     [0.22707692]

accuracy:0.2425
accuracy:0.2425


In [295]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  0,  0,  0,  0,  2,
        2,  2,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  0,  2,  2,
        0,  0,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  2,  0,  2,  2,
        2,  2,  2,  1,  0,  0,  2,  2,  2,  2,  2,  2,  0,  1,  1,  1,  0,
        2,  1,  2,  2,  2,  2,  2,  2,  0,  0,  2,  0,  0,  0,  0,  0,  2,
        0,  2,  2,  0,  0,  0,  0,  0,  0,  0,  2,  2,  2,  2,  2,  1,  2,
        2,  2,  2,  1,  1,  1,  2,  2,  1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  0,  2,  2,  2,  2, -1, -1,  0,  2, -1,  0,  0,  2,  2, -1,
       -1, -1, -1, -1, -1, -1, -1, -2, -1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2, -1,  0,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -1,  0,  0,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2

### LGB + ET

In [296]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

ext_clf = ExtraTreesClassifier(random_state=0,
                              max_depth=ext.best_params_['max_depth'],
                              min_samples_leaf=ext.best_params_['min_samples_leaf'],
                              min_samples_split=ext.best_params_['min_samples_split'],
                              n_estimators=ext.best_params_['n_estimators'])

ensemble_models =[lgb_clf,ext_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [2]

model  0:     [LGBMClassifier]
    fold  0:  [0.33660934]
    fold  1:  [0.17733990]
    fold  2:  [0.25123153]
    fold  3:  [0.18226601]
    ----
    MEAN:     [0.23686169] + [0.06457480]
    FULL:     [0.23692308]

model  1:     [ExtraTreesClassifier]
    fold  0:  [0.31449631]
    fold  1:  [0.12315271]
    fold  2:  [0.22413793]
    fold  3:  [0.13054187]
    ----
    MEAN:     [0.19808221] + [0.07811413]
    FULL:     [0.19815385]

accuracy:0.2310
accuracy:0.2310


In [297]:
y_pred

array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  1,  2,  2,  2,
        1,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  2,  1,  1,  1,  1,  1,  1,  1,  2,  2,  1,  2,  1,  1,  1,  0,
        2,  0,  2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  1,  2,  1,  1,  2,  2,  2,  2,  2,  2,
        2,  2,  1,  2,  1,  2,  2, -2, -2, -2,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -2, -2, -2, -2,  1,  2,  1,  2,  1,  1,  1,  1,  1,  1,  2,
        2,  2,  2, -2,  1,  1,  1,  1,  1,  1,  1,  1,  2,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  2,  1,  1,  2,  1,  1,  1,  1,  1,
        1,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  2,  1,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  2,  2,
        2,  2,  2,  2,  2

### RF + ET

In [298]:
from vecstack import stacking

rf_clf = RandomForestClassifier(random_state=0,
                              max_depth=rf.best_params_['max_depth'],
                              min_samples_leaf=rf.best_params_['min_samples_leaf'],
                              min_samples_split=rf.best_params_['min_samples_split'],
                              n_estimators=rf.best_params_['n_estimators'])

ext_clf = ExtraTreesClassifier(random_state=0,
                              max_depth=ext.best_params_['max_depth'],
                              min_samples_leaf=ext.best_params_['min_samples_leaf'],
                              min_samples_split=ext.best_params_['min_samples_split'],
                              n_estimators=ext.best_params_['n_estimators'])

ensemble_models =[rf_clf,ext_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [2]

model  0:     [RandomForestClassifier]
    fold  0:  [0.34152334]
    fold  1:  [0.13300493]
    fold  2:  [0.24137931]
    fold  3:  [0.19211823]
    ----
    MEAN:     [0.22700645] + [0.07644307]
    FULL:     [0.22707692]

model  1:     [ExtraTreesClassifier]
    fold  0:  [0.31449631]
    fold  1:  [0.12315271]
    fold  2:  [0.22413793]
    fold  3:  [0.13054187]
    ----
    MEAN:     [0.19808221] + [0.07811413]
    FULL:     [0.19815385]

accuracy:0.2310
accuracy:0.2310


In [299]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  2, -2, -2,  2, -2, -2,  1,
        0,  1,  1,  0,  0,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2, -2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  0,  2,  2,  2,  2,  0,  2,  2,  0, -2,  1,  1,  1,  0,
        2,  2,  2,  2,  2,  2, -2,  1, -2, -2,  2, -2,  2,  2,  2,  2,  2,
        2,  2,  2,  2, -2, -2, -2, -2, -2, -2,  2,  2,  2,  2,  2,  1,  2,
        2,  2,  2,  1,  1,  1,  2,  0,  0,  0,  0,  2,  2,  2,  2,  2,  2,
        2,  1,  2,  2,  2,  2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2, -1, -1, -1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        1,  2,  1,  2,  2,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2

### XGB + LGB + RF

In [300]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

lgb_clf = LGBMClassifier(random_state=0,
                        boosting_type = lgb.best_params_['boosting_type'],
                        feature_fraction = lgb.best_params_['feature_fraction'],
                        min_data_in_leaf = lgb.best_params_['min_data_in_leaf'],
                        sub_feature = lgb.best_params_['sub_feature'],
                        max_depth = lgb.best_params_['max_depth'],
                        metric = lgb.best_params_['metric'],
                        n_estimators = lgb.best_params_['n_estimators'],
                        num_boost_round = lgb.best_params_['num_boost_round'],
                        num_leaves = lgb.best_params_['num_leaves'],
                        )

rf_clf = RandomForestClassifier(random_state=0,
                              max_depth=rf.best_params_['max_depth'],
                              min_samples_leaf=rf.best_params_['min_samples_leaf'],
                              min_samples_split=rf.best_params_['min_samples_split'],
                              n_estimators=rf.best_params_['n_estimators'])

ensemble_models =[xgb_clf, lgb_clf, rf_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [3]

model  0:     [XGBClassifier]
    fold  0:  [0.35135135]
    fold  1:  [0.15517241]
    fold  2:  [0.25123153]
    fold  3:  [0.16256158]
    ----
    MEAN:     [0.23007922] + [0.07956760]
    FULL:     [0.23015385]

model  1:     [LGBMClassifier]
    fold  0:  [0.33660934]
    fold  1:  [0.17733990]
    fold  2:  [0.25123153]
    fold  3:  [0.18226601]
    ----
    MEAN:     [0.23686169] + [0.06457480]
    FULL:     [0.23692308]

model  2:     [RandomForestClassifier]
    fold  0:  [0.34152334]
    fold  1:  [0.13300493]
    fold  2:  [0.24137931]
    fold  3:  [0.19211823]
    ----
    MEAN:     [0.22700645] + [0.07644307]
    FULL:     [0.22707692]

accuracy:0.2425
accuracy:0.2425


In [301]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  2,  2,  2,  2,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  1,  2,  2,  2,  2,  2,  2,  2,  1,  2,  0,  0,  0,  1,
        2,  1,  2,  2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  2,  2,  2,  2,  0,  2,
        2,  2,  2,  0,  0,  0,  2,  1,  0,  1,  1,  2,  2,  2,  2,  2,  2,
        2,  2,  0,  2,  1,  2,  2, -2, -2,  2,  1,  0,  0,  2,  1,  1,  0,
       -2, -2, -2, -2, -2, -2, -2,  1, -2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2, -2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -2, -2,
       -2, -2,  2,  2,  2,  1,  1,  1,  2,  1,  1,  2, -2,  2,  2,  1,  1,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  1, -2,  1,  2,  2,  2,  2,  2,
        1,  1,  2,  1,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  2,  2,
        2,  2,  2,  2,  2

### XGB + LGB + ET

In [302]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

lgb_clf = LGBMClassifier(random_state=0,
                        boosting_type = lgb.best_params_['boosting_type'],
                        feature_fraction = lgb.best_params_['feature_fraction'],
                        min_data_in_leaf = lgb.best_params_['min_data_in_leaf'],
                        sub_feature = lgb.best_params_['sub_feature'],
                        max_depth = lgb.best_params_['max_depth'],
                        metric = lgb.best_params_['metric'],
                        n_estimators = lgb.best_params_['n_estimators'],
                        num_boost_round = lgb.best_params_['num_boost_round'],
                        num_leaves = lgb.best_params_['num_leaves'],
                        )

ext_clf = ExtraTreesClassifier(random_state=0,
                              max_depth=ext.best_params_['max_depth'],
                              min_samples_leaf=ext.best_params_['min_samples_leaf'],
                              min_samples_split=ext.best_params_['min_samples_split'],
                              n_estimators=ext.best_params_['n_estimators'])

ensemble_models =[xgb_clf, lgb_clf,ext_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [3]

model  0:     [XGBClassifier]
    fold  0:  [0.35135135]
    fold  1:  [0.15517241]
    fold  2:  [0.25123153]
    fold  3:  [0.16256158]
    ----
    MEAN:     [0.23007922] + [0.07956760]
    FULL:     [0.23015385]

model  1:     [LGBMClassifier]
    fold  0:  [0.33660934]
    fold  1:  [0.17733990]
    fold  2:  [0.25123153]
    fold  3:  [0.18226601]
    ----
    MEAN:     [0.23686169] + [0.06457480]
    FULL:     [0.23692308]

model  2:     [ExtraTreesClassifier]
    fold  0:  [0.31449631]
    fold  1:  [0.12315271]
    fold  2:  [0.22413793]
    fold  3:  [0.13054187]
    ----
    MEAN:     [0.19808221] + [0.07811413]
    FULL:     [0.19815385]

accuracy:0.2138
accuracy:0.2138


In [303]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1, -1,  2,  1,  2,  2,  2,
        1,  2, -1,  1,  1, -1, -1,  1,  2,  2,  2,  2,  2,  2,  1,  2,  2,
        1,  2,  2,  2,  2,  1,  1,  2,  2,  2,  1,  1,  1,  2,  1,  2,  2,
        2,  2,  2,  2,  1,  1,  2,  2,  1,  2,  2,  1,  2,  2,  1,  2,  1,
        2,  1, -1, -1, -1,  2,  2,  2,  2,  2, -1,  2,  1,  1,  1,  1,  2,
        1,  2,  2,  1,  2,  2,  2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  1,  2,  1,  1,  2,  2,  2,  2, -1, -1,
        2,  0,  0,  2,  1,  2,  2,  1,  1,  1,  1,  0,  0,  1,  1,  1,  0,
        1,  1,  1,  1,  1,  1,  1,  2,  1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  1,  1,  1,  2,  1,  1,  2,  1,  1,  1,  1,  1,
        2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  2,  1,  2,  2,  2,  2,  2,
        1,  1,  2,  1,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  2,  2,
        2,  2,  2,  2,  2

### XGB + RF +ET

In [253]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

rf_clf = RandomForestClassifier(random_state=0,
                              max_depth=rf.best_params_['max_depth'],
                              min_samples_leaf=rf.best_params_['min_samples_leaf'],
                              min_samples_split=rf.best_params_['min_samples_split'],
                              n_estimators=rf.best_params_['n_estimators'])

ext_clf = ExtraTreesClassifier(random_state=0,
                              max_depth=ext.best_params_['max_depth'],
                              min_samples_leaf=ext.best_params_['min_samples_leaf'],
                              min_samples_split=ext.best_params_['min_samples_split'],
                              n_estimators=ext.best_params_['n_estimators'])

ensemble_models =[xgb_clf, rf_clf,ext_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [3]

model  0:     [XGBClassifier]
    fold  0:  [0.35135135]
    fold  1:  [0.15517241]
    fold  2:  [0.25123153]
    fold  3:  [0.16256158]
    ----
    MEAN:     [0.23007922] + [0.07956760]
    FULL:     [0.23015385]

model  1:     [RandomForestClassifier]
    fold  0:  [0.34152334]
    fold  1:  [0.13300493]
    fold  2:  [0.24137931]
    fold  3:  [0.19211823]
    ----
    MEAN:     [0.22700645] + [0.07644307]
    FULL:     [0.22707692]

model  2:     [ExtraTreesClassifier]
    fold  0:  [0.31449631]
    fold  1:  [0.12315271]
    fold  2:  [0.22413793]
    fold  3:  [0.13054187]
    ----
    MEAN:     [0.19808221] + [0.07811413]
    FULL:     [0.19815385]

accuracy:0.2052
accuracy:0.2052


In [254]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  0,  1,  1,  0,  1,  1,  0,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  2,  0,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2, -2,  2,  2,  2,  0,  0,  2,  2,  2,  0,  0,  0,  2,  2,  2,  2,
        2,  2,  2,  0,  0,  2,  2,  2,  0,  2,  0,  1,  1,  1, -1,  1,  1,
        0,  0,  2,  2,  2,  0,  1,  1,  1,  1,  2,  1,  0,  0,  0,  2,  2,
        2,  2,  2,  0,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,  1,  0,
        0,  2,  2,  1,  1,  1,  0,  1,  1,  1,  1,  0,  0,  2,  2,  2,  2,
        0,  1, -1,  0,  0,  0,  2,  1,  1,  2,  2,  2, -1,  0,  0,  2,  2,
        2,  2,  1,  1,  1,  1, -1, -1, -1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,
        2,  1,  2,  1,  1,  2,  2,  2,  2,  0,  2,  0,  2,  2,  2,  2,  2,
        0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  2,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0

### LGB + RF + ET

In [304]:
from vecstack import stacking

lgb_clf = LGBMClassifier(random_state=0,
                        boosting_type = lgb.best_params_['boosting_type'],
                        feature_fraction = lgb.best_params_['feature_fraction'],
                        min_data_in_leaf = lgb.best_params_['min_data_in_leaf'],
                        sub_feature = lgb.best_params_['sub_feature'],
                        max_depth = lgb.best_params_['max_depth'],
                        metric = lgb.best_params_['metric'],
                        n_estimators = lgb.best_params_['n_estimators'],
                        num_boost_round = lgb.best_params_['num_boost_round'],
                        num_leaves = lgb.best_params_['num_leaves'],
                        )

rf_clf = RandomForestClassifier(random_state=0,
                              max_depth=rf.best_params_['max_depth'],
                              min_samples_leaf=rf.best_params_['min_samples_leaf'],
                              min_samples_split=rf.best_params_['min_samples_split'],
                              n_estimators=rf.best_params_['n_estimators'])

ext_clf = ExtraTreesClassifier(random_state=0,
                              max_depth=ext.best_params_['max_depth'],
                              min_samples_leaf=ext.best_params_['min_samples_leaf'],
                              min_samples_split=ext.best_params_['min_samples_split'],
                              n_estimators=ext.best_params_['n_estimators'])

ensemble_models =[lgb_clf, rf_clf,ext_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [3]

model  0:     [LGBMClassifier]
    fold  0:  [0.33660934]
    fold  1:  [0.17733990]
    fold  2:  [0.25123153]
    fold  3:  [0.18226601]
    ----
    MEAN:     [0.23686169] + [0.06457480]
    FULL:     [0.23692308]

model  1:     [RandomForestClassifier]
    fold  0:  [0.34152334]
    fold  1:  [0.13300493]
    fold  2:  [0.24137931]
    fold  3:  [0.19211823]
    ----
    MEAN:     [0.22700645] + [0.07644307]
    FULL:     [0.22707692]

model  2:     [ExtraTreesClassifier]
    fold  0:  [0.31449631]
    fold  1:  [0.12315271]
    fold  2:  [0.22413793]
    fold  3:  [0.13054187]
    ----
    MEAN:     [0.19808221] + [0.07811413]
    FULL:     [0.19815385]

accuracy:0.2310
accuracy:0.2310


In [305]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  2, -2,  2,  0,  2,  2,  2,
        1,  2,  1,  0,  0,  1,  2,  2,  2,  2,  2,  2,  2,  2,  0,  2,  2,
        0,  2,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  2,  0,  2,  2,
        2,  2,  2,  0,  0,  0,  2,  2,  1,  2,  2,  1,  2,  2,  2,  2,  0,
        2, -2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  2,
        0,  2,  2,  0,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -1,  2,
        2,  2,  2, -1, -1, -1,  2,  1,  2,  1,  1,  2,  2,  2,  2,  2,  2,
        2,  1,  0,  2,  2,  2,  2, -1, -1, -2,  2,  2,  0,  0,  2,  2,  2,
        2,  2, -1, -1, -1, -1, -1,  2, -1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2, -1,  0,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  2,  2,
        2,  2,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2

### XGB + LGB + RF + ET

In [306]:
from vecstack import stacking

xgb_clf = XGBClassifier(random_state=0,
                        colsample_bytree = xgb.best_params_['colsample_bytree'],
                        min_child_weight = xgb.best_params_['min_child_weight'],
                        subsample = xgb.best_params_['subsample'],
                        gamma = xgb.best_params_['gamma'],
                        learning_rate = xgb.best_params_['learning_rate'],
                         max_depth = xgb.best_params_['max_depth'],
                         n_estimators = xgb.best_params_['n_estimators'],
                         num_class = xgb.best_params_['num_class'],
                         objective = xgb.best_params_['objective']
                        )

lgb_clf = LGBMClassifier(random_state=0,
                        boosting_type = lgb.best_params_['boosting_type'],
                        feature_fraction = lgb.best_params_['feature_fraction'],
                        min_data_in_leaf = lgb.best_params_['min_data_in_leaf'],
                        sub_feature = lgb.best_params_['sub_feature'],
                        max_depth = lgb.best_params_['max_depth'],
                        metric = lgb.best_params_['metric'],
                        n_estimators = lgb.best_params_['n_estimators'],
                        num_boost_round = lgb.best_params_['num_boost_round'],
                        num_leaves = lgb.best_params_['num_leaves'],
                        )

rf_clf = RandomForestClassifier(random_state=0,
                              max_depth=rf.best_params_['max_depth'],
                              min_samples_leaf=rf.best_params_['min_samples_leaf'],
                              min_samples_split=rf.best_params_['min_samples_split'],
                              n_estimators=rf.best_params_['n_estimators'])

ext_clf = ExtraTreesClassifier(random_state=0,
                              max_depth=ext.best_params_['max_depth'],
                              min_samples_leaf=ext.best_params_['min_samples_leaf'],
                              min_samples_split=ext.best_params_['min_samples_split'],
                              n_estimators=ext.best_params_['n_estimators'])

ensemble_models =[xgb_clf, lgb_clf, rf_clf,ext_clf]
s_train, s_test = stacking(ensemble_models, X_train, y_train, X_test, 
                           mode = 'oof_pred_bag', regression=False, metric=accuracy_score, n_folds=4,
                          shuffle=False, random_state=123, verbose=2)

final_model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.05, 
                            n_estimators = 200, max_depth = 4)

final_model = final_model.fit(s_train,y_train)

y_pred = final_model.predict(s_test)
accuracy_con = accuracy_score(y_test,y_pred)
print('accuracy:{0:.4f}'.format(accuracy_con))
get_confusion_matrix(y_test,y_pred)

task:         [classification]
n_classes:    [5]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [4]

model  0:     [XGBClassifier]
    fold  0:  [0.35135135]
    fold  1:  [0.15517241]
    fold  2:  [0.25123153]
    fold  3:  [0.16256158]
    ----
    MEAN:     [0.23007922] + [0.07956760]
    FULL:     [0.23015385]

model  1:     [LGBMClassifier]
    fold  0:  [0.33660934]
    fold  1:  [0.17733990]
    fold  2:  [0.25123153]
    fold  3:  [0.18226601]
    ----
    MEAN:     [0.23686169] + [0.06457480]
    FULL:     [0.23692308]

model  2:     [RandomForestClassifier]
    fold  0:  [0.34152334]
    fold  1:  [0.13300493]
    fold  2:  [0.24137931]
    fold  3:  [0.19211823]
    ----
    MEAN:     [0.22700645] + [0.07644307]
    FULL:     [0.22707692]

model  3:     [ExtraTreesClassifier]
    fold  0:  [0.31449631]
    fold  1:  [0.12315271]
    fold  2:  [0.22413793]
    fold  3:  [0.13054187]
    ----
    MEAN:     [0.19808221] + [0.07811413]
    FULL:     [

In [307]:
y_pred

array([ 2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  2,  0,  2,  2,  2,
        1,  2, -1,  1,  1, -1,  2,  1,  2,  2,  2,  2,  2,  2, -2,  2,  2,
       -2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  0,  0,  0,  2, -2,  2,  2,
        2,  2,  2,  1,  0, -2,  2,  2,  1,  2,  2,  1,  2,  1,  1,  1,  1,
        2,  1,  2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  0,  0,  0, -2,  2,
       -2,  2,  2,  0,  2,  2,  2,  2,  2,  2,  1,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,
        2,  0,  0,  2,  1,  2,  2, -2, -2,  1,  1,  2,  0,  0,  1,  1,  2,
        2,  2, -2, -2, -2, -2, -1,  2, -1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2, -2, -2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -2, -2,
       -2, -2,  2,  2,  2,  1,  1,  1,  2,  1,  1,  2,  2,  0,  0,  1,  1,
        2,  1,  1,  2,  2,  2,  2,  2,  2,  1, -2,  1,  2,  2,  2,  2,  2,
        1,  1,  2,  1,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  2,  2,
        2,  2,  2,  2,  2