# Summary of the methodology and outcomes:  

### Outcomes

With new groups of variants, the nested strategy still underperforms relative to the SVM ran on the full dataset.  

# Nested Models

In [1]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import graphviz
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
# loading both sets
full_df = pd.read_pickle('datasets/full_df')
df_feat_eng = pd.read_pickle('datasets/df_feat_eng')

In [3]:
variants = ['antichess', 'atomic', 'blitz', 'chess960', 'crazyhouse', 'horde', 'kingOfTheHill', 'racingKings', 'threeCheck']
var_grps = ['easy_grp', 'middle_grp', 'hard_grp']

easy_grp = ['horde', 'racingKings']
middle_grp = ['chess960', 'antichess']
hard_grp = [e for e in variants if e not in [item for sublist in [easy_grp, middle_grp] for item in sublist]] # the remaining variants

In [4]:
df_feat_eng.loc[df_feat_eng['variant'].isin(easy_grp), 'var_grp'] = 'easy_grp'
df_feat_eng.loc[df_feat_eng['variant'].isin(middle_grp), 'var_grp'] = 'middle_grp'
df_feat_eng.loc[df_feat_eng['variant'].isin(hard_grp), 'var_grp'] = 'hard_grp'

In [5]:
df_feat_eng['var_grp'].value_counts()

hard_grp      5000
middle_grp    2000
easy_grp      2000
Name: var_grp, dtype: int64

# Creating Train/Validation/Test set

In [6]:
X_tr, X_te, y_tr, y_te = train_test_split(
     df_feat_eng.drop(['var_grp'], axis=1), df_feat_eng['var_grp'], test_size=1/10, random_state=0, stratify =  df_feat_eng['variant'])

# X_tr, X_va, y_tr, y_va = train_test_split(
#    X_tr, y_tr, test_size=1/4, random_state=1, stratify = y_tr)

# cross validation strategy
kfold = KFold(
    n_splits=5, 
    random_state=0) 

In [7]:
print("Shape X_tr:" , X_tr.shape, 
      "\nShape X_te:", X_te.shape,      
      "\nShape y_tr:" , y_tr.shape, 
      "\nShape y_te:" , y_te.shape)

Shape X_tr: (8100, 30) 
Shape X_te: (900, 30) 
Shape y_tr: (8100,) 
Shape y_te: (900,)


In [8]:
y_tr.value_counts()

hard_grp      4500
middle_grp    1800
easy_grp      1800
Name: var_grp, dtype: int64

In [9]:
y_te.value_counts()

hard_grp      500
easy_grp      200
middle_grp    200
Name: var_grp, dtype: int64

In [10]:
tr_variant = X_tr['variant']
X_tr = X_tr.drop(['variant'],axis=1)

In [11]:
te_variant = X_te['variant']
X_te = X_te.drop(['variant'],axis=1)

In [12]:
X_tr.head()

Unnamed: 0,nb_moves,nb_K,nb_Q,nb_R,nb_B,nb_N,nb_P,nb_k,nb_q,nb_r,...,nb_pawns_ext_center,nb_pawns_third_row_c_f,nb_pawns_b_g_col,nb_pieces_center,nb_pieces_ext_center,nb_pieces_third_row_c_f,nb_pieces_b_g_col,knight_squares,king_castle_squares,nb_checks
1743,10,1,1,2,2,1,7,1,1,2,...,0,2,0,1,0,2,1,1,0,1
7772,10,0,0,0,0,0,32,1,1,2,...,4,3,6,0,0,0,0,1,0,0
8400,10,1,1,1,0,2,0,1,0,1,...,0,0,0,0,0,0,1,1,0,0
2760,10,1,1,2,1,2,7,1,1,2,...,0,3,2,0,2,1,0,1,0,0
7741,10,0,0,0,0,0,32,1,1,2,...,3,2,6,0,0,1,0,0,0,0


# Modelling

# Simple Decision Tree

In [13]:
# model 
model = DecisionTreeClassifier()

# grid search
grid = {
    'criterion': ['gini','entropy'], 
    'max_depth': [2,5,8,10,12,15,20,50,100,150]
}

# grid-search + CV
dt_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1,
    return_train_score=True)

In [14]:
start_time = time.time()
dt_cv.fit(X_tr, y_tr)
dt_tune_time = time.time() - start_time

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    2.1s finished


Next, we look at the best parameters that lead to the best accuracy.

In [15]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(dt_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), dt_cv.best_params_, round(dt_tune_time, 1)))

Unnamed: 0,param_criterion,param_max_depth,mean_test_score,std_test_score
3,gini,10,0.904321,0.002733
4,gini,12,0.903704,0.00362
14,entropy,12,0.900247,0.004727
13,entropy,10,0.900123,0.004914
2,gini,8,0.896914,0.001562
12,entropy,8,0.896667,0.00458
5,gini,15,0.894198,0.010095
15,entropy,15,0.889506,0.002438
7,gini,50,0.887037,0.007376
11,entropy,5,0.886049,0.006266


We can expect an accuracy close to 90.4% on the testing set with the following parameters: 
{'criterion': 'gini', 'max_depth': 10}. 
The tuning time is 2.2 sec.


In [16]:
dt = DecisionTreeClassifier(
    **dt_cv.best_params_, random_state=0)

In [17]:
start_time = time.time()
dt.fit(X_tr, y_tr)
dt_train_time = time.time() - start_time

In [18]:
dt_accuracy = dt.score(X_te, y_te)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(dt_accuracy*100))

Accuracy on Testing Set: accuracy: 89.7%.


In [19]:
dt_conf=pd.DataFrame(confusion_matrix(dt.predict(X_te), y_te), index= var_grps, columns=var_grps)
round(dt_conf / dt_conf.astype(np.float).sum(axis=1),3)

Unnamed: 0,easy_grp,middle_grp,hard_grp
easy_grp,0.995,0.002,0.0
middle_grp,0.005,0.871,0.464
hard_grp,0.0,0.038,0.861


In [20]:
dt_metrics=pd.DataFrame(classification_report(dt.predict(X_te), y_te, output_dict=True)).T.round(3)
dt_metrics

Unnamed: 0,f1-score,precision,recall,support
easy_grp,0.995,0.995,0.995,200.0
hard_grp,0.911,0.956,0.871,549.0
middle_grp,0.741,0.65,0.861,151.0
micro avg,0.897,0.897,0.897,900.0
macro avg,0.882,0.867,0.909,900.0
weighted avg,0.901,0.913,0.897,900.0


# Logistic Regression

In [21]:
# Define our steps
model = LogisticRegression()

# grid 
grid = {
    'C': np.logspace(-3, 4, 8),
}

# Grid search
log_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1,
    return_train_score=True)

In [22]:
start_time = time.time()
log_cv.fit(X_tr, y_tr)
logistic_tune_time = time.time() - start_time

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    2.3s finished


In [23]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(log_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), log_cv.best_params_, round(logistic_tune_time, 1)))

Unnamed: 0,param_C,mean_test_score,std_test_score
4,10.0,0.918765,0.003257
5,100.0,0.918519,0.00362
6,1000.0,0.918519,0.00362
7,10000.0,0.918519,0.00362
3,1.0,0.917901,0.003724
2,0.1,0.915926,0.004835
1,0.01,0.912963,0.005934
0,0.001,0.892469,0.006288


We can expect an accuracy close to 91.9% on the testing set with the following parameters: 
{'C': 10.0}. 
The tuning time is 2.5 sec.


In [24]:
logistic = LogisticRegression(
    **log_cv.best_params_, random_state=0)

In [25]:
start_time = time.time()
logistic.fit(X_tr, y_tr)
logistic_train_time = time.time() - start_time



In [26]:
logistic_accuracy = logistic.score(X_te, y_te)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(logistic_accuracy*100))

Accuracy on Testing Set: accuracy: 91.6%.


In [27]:
logistic_conf=pd.DataFrame(confusion_matrix(logistic.predict(X_te), y_te), index=var_grps, columns=var_grps)/100
round(logistic_conf / logistic_conf.astype(np.float).sum(axis=1),3)

Unnamed: 0,easy_grp,middle_grp,hard_grp
easy_grp,1.0,0.0,0.0
middle_grp,0.0,0.894,0.352
hard_grp,0.0,0.035,0.883


In [28]:
logistic_metrics=pd.DataFrame(classification_report(logistic.predict(X_te), y_te, output_dict=True)).T.round(3)
logistic_metrics

Unnamed: 0,f1-score,precision,recall,support
easy_grp,1.0,1.0,1.0,200.0
hard_grp,0.927,0.962,0.894,538.0
middle_grp,0.79,0.715,0.883,162.0
micro avg,0.916,0.916,0.916,900.0
macro avg,0.906,0.892,0.926,900.0
weighted avg,0.918,0.926,0.916,900.0


# Random Forest

In [29]:
# Define our steps
model = RandomForestClassifier()

# grid 
grid = {'n_estimators': [20,100,500,750,1000],
               'max_depth': [5,10,20,50,75,100]
       } 

# Grid search
rf_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1)

In [30]:
start_time = time.time()
rf_cv.fit(X_tr, y_tr)
rf_tune_time = time.time() - start_time

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:   16.6s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.4min finished


In [31]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(rf_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), rf_cv.best_params_, round(rf_tune_time, 1)))



Unnamed: 0,param_n_estimators,param_max_depth,mean_test_score,std_test_score
14,1000,20,0.928025,0.005639
18,750,50,0.927654,0.005513
12,500,20,0.927654,0.004441
13,750,20,0.927531,0.005692
17,500,50,0.927407,0.004513
27,500,100,0.927284,0.005835
24,1000,75,0.92716,0.005223
11,100,20,0.92716,0.005165
19,1000,50,0.927037,0.005796
29,1000,100,0.926914,0.005584


We can expect an accuracy close to 92.8% on the testing set with the following parameters: 
{'max_depth': 20, 'n_estimators': 1000}. 
The tuning time is 86.0 sec.


In [32]:
rf = RandomForestClassifier(
    **rf_cv.best_params_, random_state=0)

In [33]:
start_time = time.time()
rf.fit(X_tr, y_tr)
rf_train_time = time.time() - start_time

In [34]:
rf_accuracy = rf.score(X_te, y_te)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(rf_accuracy*100))

Accuracy on Testing Set: accuracy: 93.6%.


In [35]:
rf_conf=pd.DataFrame(confusion_matrix(rf.predict(X_te), y_te), index=var_grps, columns=var_grps)/100
round(rf_conf / rf_conf.astype(np.float).sum(axis=1),3)

Unnamed: 0,easy_grp,middle_grp,hard_grp
easy_grp,1.0,0.0,0.0
middle_grp,0.0,0.908,0.316
hard_grp,0.0,0.015,0.949


In [36]:
rf_metrics=pd.DataFrame(classification_report(rf.predict(X_te), y_te, output_dict=True)).T.round(3)
rf_metrics

Unnamed: 0,f1-score,precision,recall,support
easy_grp,1.0,1.0,1.0,200.0
hard_grp,0.944,0.984,0.908,542.0
middle_grp,0.838,0.75,0.949,158.0
micro avg,0.936,0.936,0.936,900.0
macro avg,0.927,0.911,0.952,900.0
weighted avg,0.938,0.946,0.936,900.0


# SVM

In [37]:
# Define our steps
model = SVC()

# grid 
grid = {'C': np.logspace(-0,5,5), # others sets of param were tested, higher C generally performed better
               'kernel': ['sigmoid', 'rbf'], # computational performance of linear kernel too bad to be considered as a good candidate
              'gamma': np.logspace(-5,0,5) # others sets of parameters were tested, lower gammas generally performed better
       }  

# Grid search
svm_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1)

In [38]:
start_time = time.time()
svm_cv.fit(X_tr, y_tr)
svm_tune_time = time.time() - start_time

Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   20.8s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  3.1min finished


In [39]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(svm_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), svm_cv.best_params_, round(svm_tune_time, 1)))



Unnamed: 0,param_C,param_kernel,param_gamma,mean_test_score,std_test_score
25,316.228,rbf,0.00316228,0.936049,0.004963
43,100000.0,rbf,0.000177828,0.936049,0.004727
7,1.0,rbf,0.0562341,0.935309,0.006641
33,5623.41,rbf,0.000177828,0.931358,0.002826
35,5623.41,rbf,0.00316228,0.930864,0.006474
15,17.7828,rbf,0.00316228,0.930741,0.004121
17,17.7828,rbf,0.0562341,0.924074,0.005339
23,316.228,rbf,0.000177828,0.923457,0.003403
41,100000.0,rbf,1e-05,0.92321,0.003138
31,5623.41,rbf,1e-05,0.920617,0.004727


We can expect an accuracy close to 93.6% on the testing set with the following parameters: 
{'C': 316.22776601683796, 'gamma': 0.0031622776601683794, 'kernel': 'rbf'}. 
The tuning time is 190.0 sec.


In [40]:
svm = SVC(
    **svm_cv.best_params_, random_state=0, probability=True
)

In [41]:
start_time = time.time()
svm.fit(X_tr, y_tr)
svm_train_time = time.time() - start_time

In [42]:
svm_accuracy = svm.score(X_te, y_te)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(svm_accuracy*100))

Accuracy on Testing Set: accuracy: 94.9%.


In [43]:
svm_conf=pd.DataFrame(confusion_matrix(svm.predict(X_te), y_te), index=var_grps, columns=var_grps)/100
round(svm_conf / svm_conf.astype(np.float).sum(axis=1),3)

Unnamed: 0,easy_grp,middle_grp,hard_grp
easy_grp,1.0,0.0,0.0
middle_grp,0.0,0.927,0.232
hard_grp,0.0,0.013,0.958


In [44]:
svm_metrics=pd.DataFrame(classification_report(svm.predict(X_te), y_te, output_dict=True)).T.round(3)
svm_metrics

Unnamed: 0,f1-score,precision,recall,support
easy_grp,1.0,1.0,1.0,200.0
hard_grp,0.955,0.986,0.927,532.0
middle_grp,0.875,0.805,0.958,168.0
micro avg,0.949,0.949,0.949,900.0
macro avg,0.943,0.93,0.962,900.0
weighted avg,0.95,0.955,0.949,900.0


# Neural Network with Sklearn

In [45]:
# Define our steps
model = MLPClassifier()

# grid 
grid = {'hidden_layer_sizes': [5,10,15,20,50,100],
        'alpha': np.logspace(-5,5,5),
        'solver': ['adam', 'sgd'],
        'activation': ['logistic', 'relu'],
        'early_stopping': [True]
       }  

# Grid search
nnet_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1)

In [46]:
start_time = time.time()
nnet_cv.fit(X_tr, y_tr)
nnet_tune_time = time.time() - start_time

Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   37.8s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:  1.4min finished


In [47]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(nnet_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), nnet_cv.best_params_, round(nnet_tune_time, 1)))



Unnamed: 0,param_hidden_layer_sizes,param_alpha,param_solver,param_activation,param_early_stopping,mean_test_score,std_test_score
82,100,0.00316228,adam,relu,True,0.927284,0.002572
80,50,0.00316228,adam,relu,True,0.926049,0.006664
70,100,1e-05,adam,relu,True,0.924074,0.005223
76,15,0.00316228,adam,relu,True,0.923951,0.001968
68,50,1e-05,adam,relu,True,0.922716,0.011412
94,100,1.0,adam,relu,True,0.922099,0.003892
92,50,1.0,adam,relu,True,0.920617,0.004978
78,20,0.00316228,adam,relu,True,0.919753,0.004923
66,20,1e-05,adam,relu,True,0.919383,0.007636
90,20,1.0,adam,relu,True,0.919012,0.00787


We can expect an accuracy close to 92.7% on the testing set with the following parameters: 
{'activation': 'relu', 'alpha': 0.0031622776601683794, 'early_stopping': True, 'hidden_layer_sizes': 100, 'solver': 'adam'}. 
The tuning time is 84.2 sec.


In [48]:
nnet = MLPClassifier(
    **nnet_cv.best_params_, random_state=0)

In [49]:
start_time = time.time()
nnet.fit(X_tr, y_tr)
nnet_train_time = time.time() - start_time

In [50]:
nnet_accuracy = nnet.score(X_te, y_te)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(nnet_accuracy*100))

Accuracy on Testing Set: accuracy: 92.8%.


In [51]:
nnet_conf=pd.DataFrame(confusion_matrix(nnet.predict(X_te), y_te), index=var_grps, columns=var_grps)/100
round(nnet_conf / nnet_conf.astype(np.float).sum(axis=1),3)

Unnamed: 0,easy_grp,middle_grp,hard_grp
easy_grp,1.0,0.0,0.0
middle_grp,0.0,0.911,0.275
hard_grp,0.0,0.034,0.895


In [52]:
nnet_metrics=pd.DataFrame(classification_report(nnet.predict(X_te), y_te, output_dict=True)).T.round(3)
nnet_metrics

Unnamed: 0,f1-score,precision,recall,support
easy_grp,1.0,1.0,1.0,200.0
hard_grp,0.937,0.964,0.911,529.0
middle_grp,0.825,0.765,0.895,171.0
micro avg,0.928,0.928,0.928,900.0
macro avg,0.921,0.91,0.935,900.0
weighted avg,0.93,0.934,0.928,900.0


# Intermediary results

In [53]:
dt_scores_avg=list(dt_metrics.loc['weighted avg',:])[:3]
#dt_scores_blitz=list(dt_metrics.loc['blitz',:])
logistic_scores_avg=list(logistic_metrics.loc['weighted avg',:])[:3]
#logistic_scores_blitz=list(logistic_metrics.loc['blitz',:])
rf_scores_avg=list(rf_metrics.loc['weighted avg',:])[:3]
#rf_scores_blitz=list(rf_metrics.loc['blitz',:])
svm_scores_avg=list(svm_metrics.loc['weighted avg',:])[:3]
#svm_scores_blitz=list(svm_metrics.loc['blitz',:])
nnet_scores_avg=list(nnet_metrics.loc['weighted avg',:])[:3]
#nnet_scores_blitz=list(nnet_metrics.loc['blitz',:])

In [54]:
results_full=pd.DataFrame({
              'decision tree': [dt_accuracy, dt_tune_time, dt_train_time, dt_scores_avg[0], dt_scores_avg[1], dt_scores_avg[2]],
              'logistic': [logistic_accuracy, logistic_tune_time, logistic_train_time, logistic_scores_avg[0], logistic_scores_avg[1], logistic_scores_avg[2]],
              'random forest': [rf_accuracy, rf_tune_time, rf_train_time, rf_scores_avg[0], rf_scores_avg[1], rf_scores_avg[2]],
              'SVM': [svm_accuracy, svm_tune_time, svm_train_time, svm_scores_avg[0], svm_scores_avg[1], svm_scores_avg[2]],
              'NN': [nnet_accuracy, nnet_tune_time, nnet_train_time, nnet_scores_avg[0], nnet_scores_avg[1], nnet_scores_avg[2]],
}, index = ['Accuracy', 'Tuning time', 'Training time', 'f1_avg', 'precision_avg', 'recall_avg']).round(3).T.sort_values('Accuracy', ascending=False)
results_full

Unnamed: 0,Accuracy,Tuning time,Training time,f1_avg,precision_avg,recall_avg
SVM,0.949,190.029,8.339,0.95,0.955,0.949
random forest,0.936,86.047,4.82,0.938,0.946,0.936
NN,0.928,84.247,0.419,0.93,0.934,0.928
logistic,0.916,2.512,0.229,0.918,0.926,0.916
decision tree,0.897,2.171,0.027,0.901,0.913,0.897


# 2nd step

In [55]:
model_step1 = SVC(
    **svm_cv.best_params_, random_state=0)

In [56]:
model_step1.fit(X_tr, y_tr)

SVC(C=316.22776601683796, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0031622776601683794,
  kernel='rbf', max_iter=-1, probability=False, random_state=0,
  shrinking=True, tol=0.001, verbose=False)

As explained above, we create our new dataset of interest by splitting the data accordingly. 

In [57]:
X_tr['var_grp'] = y_tr
X_tr.head()

Unnamed: 0,nb_moves,nb_K,nb_Q,nb_R,nb_B,nb_N,nb_P,nb_k,nb_q,nb_r,...,nb_pawns_third_row_c_f,nb_pawns_b_g_col,nb_pieces_center,nb_pieces_ext_center,nb_pieces_third_row_c_f,nb_pieces_b_g_col,knight_squares,king_castle_squares,nb_checks,var_grp
1743,10,1,1,2,2,1,7,1,1,2,...,2,0,1,0,2,1,1,0,1,hard_grp
7772,10,0,0,0,0,0,32,1,1,2,...,3,6,0,0,0,0,1,0,0,easy_grp
8400,10,1,1,1,0,2,0,1,0,1,...,0,0,0,0,0,1,1,0,0,easy_grp
2760,10,1,1,2,1,2,7,1,1,2,...,3,2,0,2,1,0,1,0,0,middle_grp
7741,10,0,0,0,0,0,32,1,1,2,...,2,6,0,0,1,0,0,0,0,easy_grp


In [58]:
y_tr = tr_variant
y_tr.head()

1743     crazyhouse
7772          horde
8400    racingKings
2760       chess960
7741          horde
Name: variant, dtype: object

In [59]:
X_te['var_grp'] = logistic.predict(X_te)
X_te.head()

Unnamed: 0,nb_moves,nb_K,nb_Q,nb_R,nb_B,nb_N,nb_P,nb_k,nb_q,nb_r,...,nb_pawns_third_row_c_f,nb_pawns_b_g_col,nb_pieces_center,nb_pieces_ext_center,nb_pieces_third_row_c_f,nb_pieces_b_g_col,knight_squares,king_castle_squares,nb_checks,var_grp
3035,10,1,1,2,1,1,8,1,1,2,...,2,0,0,0,1,1,1,0,0,hard_grp
8710,10,1,1,1,1,1,0,1,0,2,...,0,0,0,0,1,1,2,0,0,easy_grp
3382,10,1,1,2,2,2,7,1,1,2,...,2,0,0,1,2,2,2,0,0,hard_grp
236,10,1,0,2,2,2,5,1,0,2,...,1,0,0,1,4,1,4,1,1,hard_grp
620,10,1,1,2,2,2,8,1,1,2,...,3,2,0,0,2,2,3,0,0,hard_grp


In [60]:
y_te = te_variant
y_te.head()

3035    kingOfTheHill
8710      racingKings
3382    kingOfTheHill
236             blitz
620             blitz
Name: variant, dtype: object

In [61]:
X_tr_easy = X_tr[X_tr['var_grp'] == 'easy_grp']
X_tr_middle = X_tr[X_tr['var_grp'] == 'middle_grp']
X_tr_hard = X_tr[X_tr['var_grp'] == 'hard_grp']

In [62]:
X_tr_easy.head()

Unnamed: 0,nb_moves,nb_K,nb_Q,nb_R,nb_B,nb_N,nb_P,nb_k,nb_q,nb_r,...,nb_pawns_third_row_c_f,nb_pawns_b_g_col,nb_pieces_center,nb_pieces_ext_center,nb_pieces_third_row_c_f,nb_pieces_b_g_col,knight_squares,king_castle_squares,nb_checks,var_grp
7772,10,0,0,0,0,0,32,1,1,2,...,3,6,0,0,0,0,1,0,0,easy_grp
8400,10,1,1,1,0,2,0,1,0,1,...,0,0,0,0,0,1,1,0,0,easy_grp
7741,10,0,0,0,0,0,32,1,1,2,...,2,6,0,0,1,0,0,0,0,easy_grp
7470,10,0,0,0,0,0,30,1,1,2,...,5,7,0,0,0,0,1,0,0,easy_grp
8448,5,1,0,1,0,2,0,1,1,2,...,0,0,0,0,0,0,2,1,0,easy_grp


In [63]:
y_tr_easy = y_tr.loc[y_tr.index.isin(X_tr_easy.index)] 
y_tr_middle = y_tr.loc[y_tr.index.isin(X_tr_middle.index)] 
y_tr_hard = y_tr.loc[y_tr.index.isin(X_tr_hard.index)] 

In [64]:
X_tr_easy = X_tr_easy.drop(['var_grp'], axis=1)
X_tr_middle = X_tr_middle.drop(['var_grp'], axis=1)
X_tr_hard = X_tr_hard.drop(['var_grp'], axis=1)

In [65]:
y_tr_easy.value_counts()

horde          900
racingKings    900
Name: variant, dtype: int64

In [66]:
y_tr_middle.value_counts()

antichess    900
chess960     900
Name: variant, dtype: int64

In [67]:
y_tr_hard.value_counts()

crazyhouse       900
threeCheck       900
kingOfTheHill    900
blitz            900
atomic           900
Name: variant, dtype: int64

In [68]:
X_te_easy = X_te[X_te['var_grp'] == 'easy_grp']
X_te_middle = X_te[X_te['var_grp'] == 'middle_grp']
X_te_hard = X_te[X_te['var_grp'] == 'hard_grp']

In [69]:
y_te_easy = y_te.loc[y_te.index.isin(X_te_easy.index)] 
y_te_middle = y_te.loc[y_te.index.isin(X_te_middle.index)] 
y_te_hard = y_te.loc[y_te.index.isin(X_te_hard.index)] 

In [70]:
X_te_easy = X_te_easy.drop(['var_grp'], axis=1)
X_te_middle = X_te_middle.drop(['var_grp'], axis=1)
X_te_hard = X_te_hard.drop(['var_grp'], axis=1)

In [71]:
y_te_easy.value_counts()

horde          100
racingKings    100
Name: variant, dtype: int64

In [72]:
y_te_middle.value_counts()

antichess        93
chess960         50
blitz             7
crazyhouse        5
atomic            3
threeCheck        2
kingOfTheHill     2
Name: variant, dtype: int64

In [73]:
y_te_hard.value_counts()

threeCheck       98
kingOfTheHill    98
atomic           97
crazyhouse       95
blitz            93
chess960         50
antichess         7
Name: variant, dtype: int64

We now have our different sets. In the training set, we know to which groups of variants an observation belongs, so we have "clean" sets with only the relevant variants. However, we see that for the testing set, we already have some missclassified units. For instance, about half of the crazyhouse games have already been missclassified...

In [74]:
# reminder
print(' easy_grp:', easy_grp , '\n', 
      'middle_grp:', middle_grp , '\n',
      'hard_grp:' , hard_grp)

 easy_grp: ['horde', 'racingKings'] 
 middle_grp: ['chess960', 'antichess'] 
 hard_grp: ['atomic', 'blitz', 'crazyhouse', 'kingOfTheHill', 'threeCheck']


The next steps consists of fitting the different models on the three subsets.

# Logistic Regression

In [75]:
# Define our steps
model = LogisticRegression()

# grid 
grid = {
    'C': np.logspace(-3, 4, 8),
}

# Grid search
grid_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1,
    return_train_score=True)

In [76]:
start_time = time.time()
grid_cv.fit(X_tr_hard, y_tr_hard)
logistic_hard_tune_time = time.time() - start_time

Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    4.3s finished


In [77]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(grid_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), grid_cv.best_params_, round(logistic_hard_tune_time, 1)))

Unnamed: 0,param_C,mean_test_score,std_test_score
7,10000.0,0.745333,0.020226
6,1000.0,0.744889,0.019659
5,100.0,0.739111,0.019856
4,10.0,0.722222,0.018244
3,1.0,0.682222,0.014384
2,0.1,0.648,0.015322
1,0.01,0.592,0.020673
0,0.001,0.540222,0.021157


We can expect an accuracy close to 74.5% on the testing set with the following parameters: 
{'C': 10000.0}. 
The tuning time is 5.1 sec.


In [78]:
logistic_hard = LogisticRegression(
    **grid_cv.best_params_, random_state=0)

In [79]:
start_time = time.time()
logistic_hard.fit(X_tr_hard, y_tr_hard)
logistic_hard_train_time = time.time() - start_time



In [80]:
logistic_hard_accuracy = logistic_hard.score(X_te_hard, y_te_hard)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(logistic_hard_accuracy*100))

Accuracy on Testing Set: accuracy: 67.7%.


In [81]:
logistic_hard_conf=pd.DataFrame(confusion_matrix(logistic_hard.predict(X_te_hard), y_te_hard), index=sorted(list(set(y_te_hard))), columns=sorted(list(set(y_te_hard))))/100
logistic_hard_conf

Unnamed: 0,antichess,atomic,blitz,chess960,crazyhouse,kingOfTheHill,threeCheck
antichess,0.0,0.0,0.0,0.0,0.0,0.0,0.0
atomic,0.02,0.94,0.0,0.01,0.0,0.0,0.0
blitz,0.0,0.01,0.69,0.25,0.14,0.27,0.14
chess960,0.0,0.0,0.0,0.0,0.0,0.0,0.0
crazyhouse,0.02,0.0,0.0,0.07,0.75,0.0,0.06
kingOfTheHill,0.01,0.0,0.16,0.08,0.01,0.63,0.15
threeCheck,0.02,0.02,0.08,0.09,0.05,0.08,0.63


In [82]:
logistic_hard_metrics=pd.DataFrame(classification_report(logistic_hard.predict(X_te_hard), y_te_hard, output_dict=True)).T.round(3)
logistic_hard_metrics[(logistic_hard_metrics.index.isin(hard_grp)) | (logistic_hard_metrics.index == 'weighted avg')]

  'recall', 'true', average, warn_for)


Unnamed: 0,f1-score,precision,recall,support
atomic,0.969,0.969,0.969,97.0
blitz,0.568,0.742,0.46,150.0
crazyhouse,0.811,0.789,0.833,90.0
kingOfTheHill,0.624,0.643,0.606,104.0
threeCheck,0.646,0.643,0.649,97.0
weighted avg,0.706,0.754,0.677,538.0


# Middle

In [83]:
# Define our steps
model = LogisticRegression()

# grid 
grid = {
    'C': np.logspace(-3, 4, 8),
}

# Grid search
grid_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1,
    return_train_score=True)

In [84]:
start_time = time.time()
grid_cv.fit(X_tr_middle, y_tr_middle)
logistic_middle_tune_time = time.time() - start_time

Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    0.5s finished


In [85]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(grid_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), grid_cv.best_params_, round(logistic_middle_tune_time, 1)))

Unnamed: 0,param_C,mean_test_score,std_test_score
7,10000.0,0.98,0.005386
3,1.0,0.979444,0.006713
5,100.0,0.979444,0.006713
6,1000.0,0.979444,0.006236
4,10.0,0.978333,0.008498
2,0.1,0.972222,0.006804
1,0.01,0.963333,0.009362
0,0.001,0.945,0.009196


We can expect an accuracy close to 98.0% on the testing set with the following parameters: 
{'C': 10000.0}. 
The tuning time is 0.6 sec.


In [86]:
logistic_middle = LogisticRegression(
    **grid_cv.best_params_, random_state=0)

In [87]:
start_time = time.time()
logistic_middle.fit(X_tr_middle, y_tr_middle)
logistic_middle_train_time = time.time() - start_time



In [88]:
logistic_middle_accuracy = logistic_middle.score(X_te_middle, y_te_middle)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(logistic_middle_accuracy*100))

Accuracy on Testing Set: accuracy: 88.3%.


In [89]:
logistic_middle_conf=pd.DataFrame(confusion_matrix(logistic_middle.predict(X_te_middle), y_te_middle), index=sorted(list(set(y_te_middle))), columns=sorted(list(set(y_te_middle))))/100
logistic_middle_conf

Unnamed: 0,antichess,atomic,blitz,chess960,crazyhouse,kingOfTheHill,threeCheck
antichess,0.93,0.0,0.0,0.0,0.0,0.0,0.0
atomic,0.0,0.0,0.0,0.0,0.0,0.0,0.0
blitz,0.0,0.0,0.0,0.0,0.0,0.0,0.0
chess960,0.0,0.03,0.07,0.5,0.05,0.02,0.02
crazyhouse,0.0,0.0,0.0,0.0,0.0,0.0,0.0
kingOfTheHill,0.0,0.0,0.0,0.0,0.0,0.0,0.0
threeCheck,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [90]:
logistic_middle_metrics=pd.DataFrame(classification_report(logistic_middle.predict(X_te_middle), y_te_middle, output_dict=True)).T.round(3)
logistic_middle_metrics[(logistic_middle_metrics.index.isin(middle_grp)) | (logistic_middle_metrics.index == 'weighted avg')]

  'recall', 'true', average, warn_for)


Unnamed: 0,f1-score,precision,recall,support
antichess,1.0,1.0,1.0,93.0
chess960,0.84,1.0,0.725,69.0
weighted avg,0.932,1.0,0.883,162.0


# Easy

In [91]:
# Define our steps
model = LogisticRegression()

# grid 
grid = {
    'C': np.logspace(-3, 4, 8),
}

# Grid search
grid_cv = GridSearchCV(
    model, 
    grid, 
    cv=kfold,    
    verbose = 1,
    n_jobs = -1,
    return_train_score=True)

In [92]:
start_time = time.time()
grid_cv.fit(X_tr_easy, y_tr_easy)
logistic_easy_tune_time = time.time() - start_time

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    0.3s finished


In [93]:
# Collect results in a DataFrame
cv_results = pd.DataFrame(grid_cv.cv_results_)

# getting the columns of interest:
cols = []
for i in range(len(grid.keys())):
    cols.append('param_'+list(grid.keys())[i]) 
cols.append('mean_test_score')
cols.append('std_test_score')

# printing the results
display(cv_results[cols].sort_values('mean_test_score', ascending=False).head(10))
print("We can expect an accuracy close to {:.1f}% on the testing set with the following parameters: \n{}. \nThe tuning time is {} sec.".format(max(cv_results['mean_test_score']*100), grid_cv.best_params_, round(logistic_easy_tune_time, 1)))

Unnamed: 0,param_C,mean_test_score,std_test_score
0,0.001,1.0,0.0
1,0.01,1.0,0.0
2,0.1,1.0,0.0
3,1.0,1.0,0.0
4,10.0,1.0,0.0
5,100.0,1.0,0.0
6,1000.0,1.0,0.0
7,10000.0,1.0,0.0


We can expect an accuracy close to 100.0% on the testing set with the following parameters: 
{'C': 0.001}. 
The tuning time is 0.3 sec.


In [94]:
logistic_easy = LogisticRegression(
    **grid_cv.best_params_, random_state=0)

In [95]:
start_time = time.time()
logistic_easy.fit(X_tr_easy, y_tr_easy)
logistic_easy_train_time = time.time() - start_time



In [96]:
logistic_easy_accuracy = logistic_easy.score(X_te_easy, y_te_easy)
print('Accuracy on Testing Set: accuracy: {:.1f}%.'.format(logistic_easy_accuracy*100))

Accuracy on Testing Set: accuracy: 100.0%.


In [97]:
logistic_easy_conf=pd.DataFrame(confusion_matrix(logistic_easy.predict(X_te_easy), y_te_easy), index=sorted(list(set(y_te_easy))), columns=sorted(list(set(y_te_easy))))/100
logistic_easy_conf

Unnamed: 0,horde,racingKings
horde,1.0,0.0
racingKings,0.0,1.0


In [98]:
logistic_easy_metrics=pd.DataFrame(classification_report(logistic_easy.predict(X_te_easy), y_te_easy, output_dict=True)).T.round(3)
logistic_easy_metrics[(logistic_easy_metrics.index.isin(easy_grp)) | (logistic_easy_metrics.index == 'weighted avg')]

Unnamed: 0,f1-score,precision,recall,support
horde,1.0,1.0,1.0,100.0
racingKings,1.0,1.0,1.0,100.0
weighted avg,1.0,1.0,1.0,200.0


# Accuracy logistic

In [99]:
y_pred = np.concatenate([logistic_easy.predict(X_te_easy), logistic_middle.predict(X_te_middle), logistic_hard.predict(X_te_hard)])
true_y = np.concatenate([y_te_easy, y_te_middle, y_te_hard])

We are then ready to calculate our relevant metrics.

In [100]:
logistic_nested_conf=pd.DataFrame(confusion_matrix(y_pred, true_y), index=sorted(list(set(variants))), columns=sorted(list(set(variants))))/100
logistic_nested_conf

Unnamed: 0,antichess,atomic,blitz,chess960,crazyhouse,horde,kingOfTheHill,racingKings,threeCheck
antichess,0.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
atomic,0.02,0.94,0.0,0.01,0.0,0.0,0.0,0.0,0.0
blitz,0.0,0.01,0.69,0.25,0.14,0.0,0.27,0.0,0.14
chess960,0.0,0.03,0.07,0.5,0.05,0.0,0.02,0.0,0.02
crazyhouse,0.02,0.0,0.0,0.07,0.75,0.0,0.0,0.0,0.06
horde,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
kingOfTheHill,0.01,0.0,0.16,0.08,0.01,0.0,0.63,0.0,0.15
racingKings,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
threeCheck,0.02,0.02,0.08,0.09,0.05,0.0,0.08,0.0,0.63


In [101]:
accuracy_score(y_pred, true_y)

0.7855555555555556

In [102]:
logistic_nested_metrics=pd.DataFrame(classification_report(y_pred, true_y, output_dict=True)).T.round(3)
logistic_nested_metrics

Unnamed: 0,f1-score,precision,recall,support
antichess,0.964,0.93,1.0,93.0
atomic,0.954,0.94,0.969,97.0
blitz,0.552,0.69,0.46,150.0
chess960,0.592,0.5,0.725,69.0
crazyhouse,0.789,0.75,0.833,90.0
horde,1.0,1.0,1.0,100.0
kingOfTheHill,0.618,0.63,0.606,104.0
racingKings,1.0,1.0,1.0,100.0
threeCheck,0.64,0.63,0.649,97.0
micro avg,0.786,0.786,0.786,900.0


# Going to the point

Now, let's clean the code to be able to run this procedure quickly and with the only goal of maximising the accuracy and retrieving the different metrics for a given model.

## Logistic

In [103]:
# "Easy model"
model = LogisticRegression()
grid = {'C': np.logspace(-3, 4, 8),}
log_easy_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
log_easy_cv.fit(X_tr_easy, y_tr_easy)
log_easy = LogisticRegression(**log_easy_cv.best_params_, random_state=0)

start_time = time.time()
log_easy.fit(X_tr_easy, y_tr_easy)
logistic_2nd_train_time = time.time() - start_time

# "Middle model"
model = LogisticRegression()
grid = {'C': np.logspace(-3, 4, 8),}
log_middle_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
log_middle_cv.fit(X_tr_middle, y_tr_middle)
log_middle = LogisticRegression(**log_middle_cv.best_params_, random_state=0)

start_time = time.time()
log_middle.fit(X_tr_middle, y_tr_middle)
logistic_2nd_train_time = logistic_2nd_train_time + time.time() - start_time

# "Hard model"
model = LogisticRegression()
grid = {'C': np.logspace(-3, 4, 8),}
log_hard_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
log_hard_cv.fit(X_tr_hard, y_tr_hard)
log_hard = LogisticRegression(**log_hard_cv.best_params_, random_state=0)

start_time = time.time()
log_hard.fit(X_tr_hard, y_tr_hard)
logistic_2nd_train_time = logistic_2nd_train_time + time.time() - start_time


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    4.2s finished


In [104]:
y_pred = np.concatenate([log_easy.predict(X_te_easy), log_middle.predict(X_te_middle), log_hard.predict(X_te_hard)])
true_y = np.concatenate([y_te_easy, y_te_middle, y_te_hard])

In [105]:
logistic_nested_accuracy = accuracy_score(y_pred, true_y)
logistic_nested_accuracy

0.7855555555555556

In [106]:
logistic_nested_metrics=pd.DataFrame(classification_report(y_pred, true_y, output_dict=True)).T.round(3)

## SVM

In [107]:
# "Easy model"
model = SVC()
grid = {'C': np.logspace(-0,5,5), 'kernel': ['sigmoid', 'rbf'], 'gamma': np.logspace(-5,0,5)} 
svm_easy_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
svm_easy_cv.fit(X_tr_easy, y_tr_easy)
svm_easy = SVC(**svm_easy_cv.best_params_, random_state=0)

start_time = time.time()
svm_easy.fit(X_tr_easy, y_tr_easy)
svm_2nd_train_time = time.time() - start_time

# "Middle model"
model = SVC()
grid = {'C': np.logspace(-0,5,5), 'kernel': ['sigmoid', 'rbf'], 'gamma': np.logspace(-5,0,5)} 
svm_middle_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
svm_middle_cv.fit(X_tr_middle, y_tr_middle)
svm_middle = SVC(**svm_middle_cv.best_params_, random_state=0)

start_time = time.time()
svm_middle.fit(X_tr_middle, y_tr_middle)
svm_2nd_train_time = svm_2nd_train_time + time.time() - start_time

# "Hard model"
model = SVC()
grid = {'C': np.logspace(-0,5,5), 'kernel': ['sigmoid', 'rbf'], 'gamma': np.logspace(-5,0,5)} 
svm_hard_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
svm_hard_cv.fit(X_tr_hard, y_tr_hard)
svm_hard = SVC(**svm_hard_cv.best_params_, random_state=0)

start_time = time.time()
svm_hard.fit(X_tr_hard, y_tr_hard)
svm_2nd_train_time = svm_2nd_train_time + time.time() - start_time

Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 100 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:    3.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 235 out of 250 | elapsed:    4.2s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:    4.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  3.4min finished


In [108]:
y_pred = np.concatenate([svm_easy.predict(X_te_easy), svm_middle.predict(X_te_middle), svm_hard.predict(X_te_hard)])
true_y = np.concatenate([y_te_easy, y_te_middle, y_te_hard])

In [109]:
svm_nested_accuracy = accuracy_score(y_pred, true_y)
svm_nested_accuracy

0.7911111111111111

In [110]:
svm_nested_metrics=pd.DataFrame(classification_report(y_pred, true_y, output_dict=True)).T.round(3)

## Random Forest

In [111]:
# "Easy model"
model = RandomForestClassifier()
grid = {'n_estimators': [20,100,500,750,1000], 'max_depth': [5,10,20,50,75,100]} 
rf_easy_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
rf_easy_cv.fit(X_tr_easy, y_tr_easy)
rf_easy = RandomForestClassifier(**rf_easy_cv.best_params_, random_state=0)

start_time = time.time()
rf_easy.fit(X_tr_easy, y_tr_easy)
rf_2nd_train_time = time.time() - start_time

# "Middle model"
model = RandomForestClassifier()
grid = {'n_estimators': [20,100,500,750,1000], 'max_depth': [5,10,20,50,75,100]} 
rf_middle_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
rf_middle_cv.fit(X_tr_middle, y_tr_middle)
rf_middle = RandomForestClassifier(**rf_middle_cv.best_params_, random_state=0)

start_time = time.time()
rf_middle.fit(X_tr_middle, y_tr_middle)
rf_2nd_train_time = rf_2nd_train_time + time.time() - start_time

# "Hard model"
model = RandomForestClassifier()
grid = {'n_estimators': [20,100,500,750,1000], 'max_depth': [5,10,20,50,75,100]} 
rf_hard_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
rf_hard_cv.fit(X_tr_hard, y_tr_hard)
rf_hard = RandomForestClassifier(**rf_hard_cv.best_params_, random_state=0)

start_time = time.time()
rf_hard.fit(X_tr_hard, y_tr_hard)
rf_2nd_train_time = rf_2nd_train_time + time.time() - start_time

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   32.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.0min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:   34.3s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  3.5min finished


In [112]:
y_pred = np.concatenate([rf_easy.predict(X_te_easy), rf_middle.predict(X_te_middle), rf_hard.predict(X_te_hard)])
true_y = np.concatenate([y_te_easy, y_te_middle, y_te_hard])

In [113]:
rf_nested_accuracy = accuracy_score(y_pred, true_y)
rf_nested_accuracy 

0.7577777777777778

In [114]:
rf_nested_metrics=pd.DataFrame(classification_report(y_pred, true_y, output_dict=True)).T.round(3)

## Neural network

In [115]:
# "Easy model"
model = MLPClassifier()
grid = {'hidden_layer_sizes': [5,10,15,20,50,100],
        'alpha': np.logspace(-5,5,5),
        'solver': ['adam', 'sgd'],
        'activation': ['logistic', 'relu'],
        'early_stopping': [True]
       } 
nnet_easy_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
nnet_easy_cv.fit(X_tr_easy, y_tr_easy)
nnet_easy = MLPClassifier(**nnet_easy_cv.best_params_, random_state=0)

start_time = time.time()
nnet_easy.fit(X_tr_easy, y_tr_easy)
nnet_2nd_train_time = time.time() - start_time

# "Middle model"
model = MLPClassifier()
grid = {'hidden_layer_sizes': [5,10,15,20,50,100],
        'alpha': np.logspace(-5,5,5),
        'solver': ['adam', 'sgd'],
        'activation': ['logistic', 'relu'],
        'early_stopping': [True]
       } 
nnet_middle_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
nnet_middle_cv.fit(X_tr_middle, y_tr_middle)
nnet_middle = MLPClassifier(**nnet_middle_cv.best_params_, random_state=0)

start_time = time.time()
nnet_middle.fit(X_tr_middle, y_tr_middle)
nnet_2nd_train_time = nnet_2nd_train_time + time.time() - start_time

# "Hard model"
model = MLPClassifier()
grid = {'hidden_layer_sizes': [5,10,15,20,50,100],
        'alpha': np.logspace(-5,5,5),
        'solver': ['adam', 'sgd'],
        'activation': ['logistic', 'relu'],
        'early_stopping': [True]
       } 
nnet_hard_cv = GridSearchCV(model, grid, cv=kfold, verbose = 1,n_jobs = -1,return_train_score=True)
nnet_hard_cv.fit(X_tr_hard, y_tr_hard)
nnet_hard = MLPClassifier(**nnet_hard_cv.best_params_, random_state=0)

start_time = time.time()
nnet_hard.fit(X_tr_hard, y_tr_hard)
nnet_2nd_train_time = nnet_2nd_train_time + time.time() - start_time



Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   33.9s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:  2.8min finished


Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:   20.6s
[Parallel(n_jobs=-1)]: Done 212 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 462 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:  2.8min finished


Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:  4.0min finished


In [116]:
y_pred = np.concatenate([nnet_easy.predict(X_te_easy), nnet_middle.predict(X_te_middle), nnet_hard.predict(X_te_hard)])
true_y = np.concatenate([y_te_easy, y_te_middle, y_te_hard])

In [117]:
nnet_nested_accuracy=accuracy_score(y_pred, true_y)
nnet_nested_accuracy

0.7511111111111111

In [118]:
nnet_nested_metrics=pd.DataFrame(classification_report(y_pred, true_y, output_dict=True)).T.round(3)

# Results

In [119]:
logistic_nested_scores_avg=list(logistic_nested_metrics.loc['weighted avg',:])[:3]
logistic_nested_scores_blitz=list(logistic_nested_metrics.loc['blitz',:])
rf_nested_scores_avg=list(rf_nested_metrics.loc['weighted avg',:])[:3]
rf_nested_scores_blitz=list(rf_nested_metrics.loc['blitz',:])
svm_nested_scores_avg=list(svm_nested_metrics.loc['weighted avg',:])[:3]
svm_nested_scores_blitz=list(svm_nested_metrics.loc['blitz',:])
nnet_nested_scores_avg=list(logistic_nested_metrics.loc['weighted avg',:])[:3]
nnet_nested_scores_blitz=list(logistic_nested_metrics.loc['blitz',:])

In [120]:
# total train time = train time in the first iteration + sum of all training time for all 3 subsets in the 2nd iteration
logistic_nested_train_time = svm_train_time + logistic_2nd_train_time
rf_nested_train_time = svm_train_time + rf_2nd_train_time
svm_nested_train_time = svm_train_time + svm_2nd_train_time
nnet_nested_train_time = svm_train_time + nnet_2nd_train_time

In [121]:
results_nested_1=pd.DataFrame({
              'logistic': [logistic_nested_accuracy, logistic_nested_train_time, logistic_nested_scores_avg[0], logistic_nested_scores_avg[1], logistic_nested_scores_avg[2], logistic_nested_scores_blitz[0], logistic_nested_scores_blitz[1], logistic_nested_scores_blitz[2], logistic_nested_scores_blitz[3]],
              'random forest': [rf_nested_accuracy, rf_nested_train_time, rf_nested_scores_avg[0], rf_nested_scores_avg[1], rf_nested_scores_avg[2], rf_nested_scores_avg[0], rf_nested_scores_blitz[1], rf_nested_scores_blitz[2], rf_nested_scores_blitz[3]],
              'SVM': [svm_nested_accuracy, svm_nested_train_time, svm_nested_scores_avg[0], svm_nested_scores_avg[1], svm_nested_scores_avg[2], svm_nested_scores_blitz[0], svm_nested_scores_blitz[1], svm_nested_scores_blitz[2], svm_nested_scores_blitz[3]],
              'NN': [nnet_nested_accuracy, nnet_nested_train_time, nnet_nested_scores_avg[0], nnet_nested_scores_avg[1], nnet_nested_scores_avg[2], nnet_nested_scores_blitz[0], nnet_nested_scores_blitz[1], nnet_nested_scores_blitz[2], nnet_nested_scores_blitz[3]],
}, index = ['Accuracy', 'Training time', 'f1_avg', 'precision_avg', 'recall_avg', 'f1_blitz', 'precision_blitz', 'recall_blitz', 'support_blitz']).round(3).T.sort_values('Accuracy', ascending=False)
results_nested_1

Unnamed: 0,Accuracy,Training time,f1_avg,precision_avg,recall_avg,f1_blitz,precision_blitz,recall_blitz,support_blitz
SVM,0.791,9.414,0.786,0.798,0.791,0.559,0.73,0.453,161.0
logistic,0.786,9.152,0.781,0.789,0.786,0.552,0.69,0.46,150.0
random forest,0.758,23.757,0.755,0.761,0.758,0.755,0.6,0.432,139.0
NN,0.751,33.932,0.781,0.789,0.786,0.552,0.69,0.46,150.0


In [122]:
results_nested_1.to_pickle('datasets/results_nested_1')

The results are clearly better than for the first iteration, but still far from our best performing models.