In [1]:
import pandas as pd
from scipy.io import arff
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, f1_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
data = arff.loadarff('EEG Eye State.arff')
eeg_data = pd.DataFrame(data[0])
eeg_data.head()

Unnamed: 0,AF3,F7,F3,FC5,T7,P7,O1,O2,P8,T8,FC6,F4,F8,AF4,eyeDetection
0,4329.23,4009.23,4289.23,4148.21,4350.26,4586.15,4096.92,4641.03,4222.05,4238.46,4211.28,4280.51,4635.9,4393.85,b'0'
1,4324.62,4004.62,4293.85,4148.72,4342.05,4586.67,4097.44,4638.97,4210.77,4226.67,4207.69,4279.49,4632.82,4384.1,b'0'
2,4327.69,4006.67,4295.38,4156.41,4336.92,4583.59,4096.92,4630.26,4207.69,4222.05,4206.67,4282.05,4628.72,4389.23,b'0'
3,4328.72,4011.79,4296.41,4155.9,4343.59,4582.56,4097.44,4630.77,4217.44,4235.38,4210.77,4287.69,4632.31,4396.41,b'0'
4,4326.15,4011.79,4292.31,4151.28,4347.69,4586.67,4095.9,4627.69,4210.77,4244.1,4212.82,4288.21,4632.82,4398.46,b'0'


In [3]:
#Have to change 'eyeDetection' values to stricly be 1 and -1
eeg_data['eyeDetection'] = np.where((eeg_data['eyeDetection'] == "b'0'"), -1, eeg_data['eyeDetection'])
eeg_data['eyeDetection'] = np.where((eeg_data['eyeDetection'] == "b'1'"), 1, eeg_data['eyeDetection'])

#Ensure all values are numeric
eeg_data['eyeDetection'] = pd.to_numeric(eeg_data['eyeDetection'])

#Value counts
eeg_data['eyeDetection'].value_counts()

0    8257
1    6723
Name: eyeDetection, dtype: int64

In [4]:
#Change 0 values to -1
eeg_data['eyeDetection'] = np.where((eeg_data['eyeDetection'] == 0), -1, eeg_data['eyeDetection'])
eeg_data['eyeDetection'].value_counts()

-1    8257
 1    6723
Name: eyeDetection, dtype: int64

In [7]:
#Now I have to standardize all numerical columns
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

def scale_columns(dataset, cols_scale):
    for col in cols_scale:
        dataset[col] = pd.DataFrame(scaler.fit_transform(pd.DataFrame(eeg_data[col])), columns=[col])
    return dataset

In [8]:
scaled_eeg_data = scale_columns(eeg_data, [i for i in list(eeg_data.columns) if i not in ['eyeDetection']])
scaled_eeg_data.head()

Unnamed: 0,AF3,F7,F3,FC5,T7,P7,O1,O2,P8,T8,FC6,F4,F8,AF4,eyeDetection
0,0.002934,-0.011704,0.567398,-0.003209,0.245236,-0.019788,-0.00293,0.852568,0.001509,0.18775,0.23351,0.030745,0.017127,-0.003834,-1
1,0.001084,-0.112052,0.67139,-0.003111,0.008893,-0.01961,-0.002817,0.782241,-0.003771,-0.122109,0.138498,0.006192,0.014578,-0.005489,-1
2,0.002316,-0.067429,0.705829,-0.001636,-0.138785,-0.020663,-0.00293,0.484886,-0.005213,-0.243529,0.111503,0.067815,0.011185,-0.004618,-1
3,0.00273,0.04402,0.729013,-0.001734,0.053225,-0.021015,-0.002817,0.502297,-0.000649,0.106803,0.220012,0.203578,0.014156,-0.003399,-1
4,0.001698,0.04402,0.636726,-0.00262,0.171253,-0.01961,-0.003152,0.397148,-0.003771,0.335977,0.274267,0.216095,0.014578,-0.003051,-1


In [10]:
#Collecting all the samples we will need for each algorithm
all_samples = []

for sample in range(0,5):
    all_samples.append(scaled_eeg_data.sample(n=5000, replace=False))

In [11]:
len(all_samples)

5

In [46]:
len(scaled_eeg_data)

14980

In [47]:
scaled_eeg_data['eyeDetection'].value_counts()

-1    8257
 1    6723
Name: eyeDetection, dtype: int64

# Logistic Regression

In [12]:
#Lists to save models and training/testing sets
all_models = []
X_training_sets = []
y_training_sets = []
X_testing_sets = []
y_testing_sets = []

#lists to save best params
accuracy_best_params = []
roc_auc_best_params = []
f1_best_params = []

#looping across each sample for each trial
for sample in all_samples:
    X_train = sample.iloc[:, :-1]
    y_train = sample.iloc[:, -1]
    X_training_sets.append(X_train)
    y_training_sets.append(y_train)
    
    #Separating rows for the test set that were not in the sample
    ix = [i for i in scaled_eeg_data.index if i not in sample.index]
    test_set = scaled_eeg_data.loc[ix]
    X_test = test_set.iloc[:, :-1]
    y_test = test_set.iloc[:, -1]
    X_testing_sets.append(X_test)
    y_testing_sets.append(y_test)
    
    #Initiating classifier
    log_reg = LogisticRegression(max_iter=10000)
    
    grid_values = [{'solver':['saga'],
                   'penalty': ['l1', 'l2'],
                   'C': [.00000001, .0000001, .000001, .00001, .0001, .001, .01, .1, 1, 10, 100, 1000, 10000]},
                   {'solver':['lbfgs'],
                   'penalty': ['l2'],
                   'C': [.00000001, .0000001, .000001, .00001, .0001, .001, .01, .1, 1, 10, 100, 1000, 10000]},
                   {'solver':['lbfgs', 'saga'],
                   'penalty': ['none'],}
                   ]
    clf = GridSearchCV(estimator = log_reg, param_grid = grid_values, cv = StratifiedKFold(n_splits=5),
                      scoring = ['accuracy', 'roc_auc_ovr', 'f1_micro'], refit=False, verbose=0)
    
    model = clf.fit(X_train, y_train)
    
    accuracy_best_params.append(model.cv_results_['params'][np.argmin(model.cv_results_['rank_test_accuracy'])])
    roc_auc_best_params.append(model.cv_results_['params'][np.argmin(model.cv_results_['rank_test_roc_auc_ovr'])])
    f1_best_params.append(model.cv_results_['params'][np.argmin(model.cv_results_['rank_test_f1_micro'])])
    
    all_models.append(model)

In [13]:
accuracy_best_params

[{'penalty': 'none', 'solver': 'lbfgs'},
 {'C': 1000, 'penalty': 'l2', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'},
 {'C': 10000, 'penalty': 'l2', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'}]

In [14]:
roc_auc_best_params

[{'penalty': 'none', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'}]

In [15]:
f1_best_params

[{'penalty': 'none', 'solver': 'lbfgs'},
 {'C': 1000, 'penalty': 'l2', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'},
 {'C': 10000, 'penalty': 'l2', 'solver': 'lbfgs'},
 {'penalty': 'none', 'solver': 'lbfgs'}]

In [48]:
#Now we need to loop across all models from each metric, train on entire sample, and then predict on test set
accuracy_test_errors = []
accuracy_train_errors = []
for param in accuracy_best_params:
    
    n = 0
    if 'C' in param:
        
        logreg_clf = LogisticRegression(penalty = param['penalty'], solver = param['solver'], C = param['C'], max_iter = 10000)
        model = logreg_clf.fit(X_training_sets[n], y_training_sets[n])
    else:
        
        logreg_clf = LogisticRegression(penalty = param['penalty'], solver = param['solver'],  max_iter = 10000)
        model = logreg_clf.fit(X_training_sets[n], y_training_sets[n])
    
    y_pred = model.predict(X_testing_sets[n])
    accuracy_test_errors.append(accuracy_score(y_testing_sets[n], y_pred))
    
    y_pred_train = model.predict(X_training_sets[n])
    accuracy_train_errors.append(accuracy_score(y_training_sets[n], y_pred_train))
    
    n += 1

In [49]:
accuracy_test_errors

[0.6366733466933868,
 0.6333667334669338,
 0.6366733466933868,
 0.6371743486973948,
 0.6366733466933868]

In [50]:
accuracy_train_errors

[0.6432, 0.6358, 0.6432, 0.6402, 0.6432]

In [51]:
roc_auc_test_errors = []
roc_auc_train_errors = []
for param in roc_auc_best_params:
    
    n = 0
    
    if 'C' in param:
        
        logreg_clf = LogisticRegression(penalty = param['penalty'], solver = param['solver'], C = param['C'], max_iter = 10000)
        model = logreg_clf.fit(X_training_sets[n], y_training_sets[n])
    else:
        
        logreg_clf = LogisticRegression(penalty = param['penalty'], solver = param['solver'],  max_iter = 10000)
        model = logreg_clf.fit(X_training_sets[n], y_training_sets[n])
    
    y_pred2 = model.predict(X_testing_sets[n])
    roc_auc_test_errors.append(roc_auc_score(y_testing_sets[n], y_pred2))
    
    y_pred_train2 = model.predict(X_training_sets[n])
    roc_auc_train_errors.append(roc_auc_score(y_training_sets[n], y_pred_train2))
    
    n += 1

In [52]:
roc_auc_test_errors

[0.6231276136836025,
 0.6231276136836025,
 0.6231276136836025,
 0.6231276136836025,
 0.6231276136836025]

In [53]:
roc_auc_train_errors 

[0.6324617101413792,
 0.6324617101413792,
 0.6324617101413792,
 0.6324617101413792,
 0.6324617101413792]

In [54]:
f1_score_test_errors = []
f1_score_train_errors = []
for param in f1_best_params:
    
    n = 0
    
    if 'C' in param:
        
        logreg_clf = LogisticRegression(penalty = param['penalty'], solver = param['solver'], C = param['C'], max_iter = 10000)
        model = logreg_clf.fit(X_training_sets[n], y_training_sets[n])
    else:
        
        logreg_clf = LogisticRegression(penalty = param['penalty'], solver = param['solver'],  max_iter = 10000)
        model = logreg_clf.fit(X_training_sets[n], y_training_sets[n])
    
    y_pred3 = model.predict(X_testing_sets[n])
    f1_score_test_errors.append(f1_score(y_testing_sets[n], y_pred3))
    
    y_pred_train3 = model.predict(X_training_sets[n])
    f1_score_train_errors.append(f1_score(y_training_sets[n], y_pred_train3))
    
    
    n += 1

In [55]:
f1_score_test_errors

[0.5502356735301415,
 0.5433670285785599,
 0.5502356735301415,
 0.5505771378925157,
 0.5502356735301415]

In [56]:
f1_score_train_errors

[0.5665694849368319,
 0.5546588407923698,
 0.5665694849368319,
 0.5623935782048164,
 0.5665694849368319]

# KNN

In [25]:
#Lists to save models and training/testing sets
all_models_knn = []
X_training_sets_knn = []
y_training_sets_knn = []
X_testing_sets_knn = []
y_testing_sets_knn = []

#lists to save best params
accuracy_best_params_knn = []
roc_auc_best_params_knn = []
f1_best_params_knn = []

#looping across each sample for each trial
for sample in all_samples:
    X_train_knn = sample.iloc[:, :-1]
    y_train_knn = sample.iloc[:, -1]
    X_training_sets_knn.append(X_train_knn)
    y_training_sets_knn.append(y_train_knn)
    
    #Separating rows for the test set that were not in the sample
    ix_knn = [i for i in scaled_eeg_data.index if i not in sample.index]
    test_set_knn = scaled_eeg_data.loc[ix]
    X_test_knn = test_set_knn.iloc[:, :-1]
    y_test_knn = test_set_knn.iloc[:, -1]
    X_testing_sets_knn.append(X_test_knn)
    y_testing_sets_knn.append(y_test_knn)
    
    #Initiating classifier
    knn = KNeighborsClassifier()
    
    grid_values = {'n_neighbors' : list(range(1,105,4))}
    
    clf_knn = GridSearchCV(estimator = knn, param_grid = grid_values, cv = StratifiedKFold(n_splits=5),
                      scoring = ['accuracy', 'roc_auc_ovr', 'f1_micro'], refit=False, verbose=0)
    
    model_knn = clf_knn.fit(X_train_knn, y_train_knn)
    
    accuracy_best_params_knn.append(model_knn.cv_results_['params'][np.argmin(model_knn.cv_results_['rank_test_accuracy'])])
    roc_auc_best_params_knn.append(model_knn.cv_results_['params'][np.argmin(model_knn.cv_results_['rank_test_roc_auc_ovr'])])
    f1_best_params_knn.append(model_knn.cv_results_['params'][np.argmin(model_knn.cv_results_['rank_test_f1_micro'])])
    
    all_models_knn.append(model_knn)
    

In [26]:
accuracy_best_params_knn

[{'n_neighbors': 5},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 5},
 {'n_neighbors': 1}]

In [27]:
roc_auc_best_params_knn

[{'n_neighbors': 5},
 {'n_neighbors': 5},
 {'n_neighbors': 9},
 {'n_neighbors': 9},
 {'n_neighbors': 5}]

In [28]:
f1_best_params_knn

[{'n_neighbors': 5},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 5},
 {'n_neighbors': 1}]

In [57]:
#Now we need to loop across all models from each metric, train on entire sample, and then predict on test set
accuracy_test_errors_knn = []
accuracy_train_errors_knn = []
for param in accuracy_best_params_knn:
    
    n_knn = 0
    
    knn_clf = KNeighborsClassifier(n_neighbors = param['n_neighbors'])
    model = knn_clf.fit(X_training_sets_knn[n_knn], y_training_sets_knn[n_knn])
    
    y_pred_knn = model.predict(X_testing_sets_knn[n_knn])
    accuracy_test_errors_knn.append(accuracy_score(y_testing_sets_knn[n_knn], y_pred_knn))
    
    y_pred_train_knn = model.predict(X_training_sets_knn[n_knn])
    accuracy_train_errors_knn.append(accuracy_score(y_training_sets_knn[n_knn], y_pred_train_knn))
    
    n_knn += 1

In [58]:
accuracy_test_errors_knn

[0.8207414829659319,
 0.8652304609218436,
 0.8652304609218436,
 0.8207414829659319,
 0.8652304609218436]

In [59]:
accuracy_train_errors_knn 

[0.8766, 1.0, 1.0, 0.8766, 1.0]

In [60]:
roc_auc_test_errors_knn = []
roc_auc_train_errors_knn = []
for param in roc_auc_best_params_knn:
    
    n_knn = 0
    
    knn_clf2 = KNeighborsClassifier(n_neighbors = param['n_neighbors'])
    model = knn_clf2.fit(X_training_sets_knn[n_knn], y_training_sets_knn[n_knn])
    
    y_pred_knn2 = model.predict(X_testing_sets_knn[n_knn])
    roc_auc_test_errors_knn.append(roc_auc_score(y_testing_sets_knn[n_knn], y_pred_knn2))
    
    y_pred_train_knn2 = model.predict(X_training_sets_knn[n_knn])
    roc_auc_train_errors_knn.append(roc_auc_score(y_training_sets_knn[n_knn], y_pred_train_knn2))
    
    n_knn += 1

In [61]:
roc_auc_test_errors_knn

[0.8175869828047664,
 0.8175869828047664,
 0.8029711174314488,
 0.8029711174314488,
 0.8175869828047664]

In [62]:
roc_auc_train_errors_knn

[0.8744413752237604,
 0.8744413752237604,
 0.8463682474524097,
 0.8463682474524097,
 0.8744413752237604]

In [63]:
f1_score_test_errors_knn = []
f1_score_train_errors_knn = []
for param in f1_best_params_knn:
    
    n_knn = 0
    
    knn_clf3 = KNeighborsClassifier(n_neighbors = param['n_neighbors'])
    model = knn_clf3.fit(X_training_sets_knn[n_knn], y_training_sets_knn[n_knn])
    
    y_pred_knn3 = model.predict(X_testing_sets_knn[n_knn])
    f1_score_test_errors_knn.append(f1_score(y_testing_sets_knn[n_knn], y_pred_knn3))
    
    y_pred_train_knn3 = model.predict(X_training_sets_knn[n_knn])
    f1_score_train_errors_knn.append(f1_score(y_training_sets_knn[n_knn], y_pred_train_knn3))
    
    
    n_knn += 1

In [64]:
f1_score_test_errors_knn

[0.7975557315831164,
 0.850339379103149,
 0.850339379103149,
 0.7975557315831164,
 0.850339379103149]

In [65]:
f1_score_train_errors_knn

[0.8625529071062598, 1.0, 1.0, 0.8625529071062598, 1.0]

# Decision Tree

In [36]:
#Lists to save models and training/testing sets
all_models_dt = []
X_training_sets_dt = []
y_training_sets_dt = []
X_testing_sets_dt = []
y_testing_sets_dt = []

#lists to save best params
accuracy_best_params_dt = []
roc_auc_best_params_dt = []
f1_best_params_dt = []

#looping across each sample for each trial
for sample in all_samples:
    X_train_dt = sample.iloc[:, :-1]
    y_train_dt = sample.iloc[:, -1]
    X_training_sets_dt.append(X_train_dt)
    y_training_sets_dt.append(y_train_dt)
    
    #Separating rows for the test set that were not in the sample
    ix_dt = [i for i in scaled_eeg_data.index if i not in sample.index]
    test_set_dt = scaled_eeg_data.loc[ix_dt]
    X_test_dt = test_set_dt.iloc[:, :-1]
    y_test_dt = test_set_dt.iloc[:, -1]
    X_testing_sets_dt.append(X_test_dt)
    y_testing_sets_dt.append(y_test_dt)
    
    #Initiating classifier
    dt = DecisionTreeClassifier()
    
    grid_values = [{'criterion': ['gini', 'entropy'], 'max_depth':list(range(1,100,3)), 'min_samples_leaf': list(range(10,100,10))}]
    
    clf_dt = GridSearchCV(estimator = dt, param_grid = grid_values, cv = StratifiedKFold(n_splits=5),
                      scoring = ['accuracy', 'roc_auc_ovr', 'f1_micro'], refit=False, verbose=0)
    
    model_dt = clf_dt.fit(X_train_dt, y_train_dt)
    
    accuracy_best_params_dt.append(model_dt.cv_results_['params'][np.argmin(model_dt.cv_results_['rank_test_accuracy'])])
    roc_auc_best_params_dt.append(model_dt.cv_results_['params'][np.argmin(model_dt.cv_results_['rank_test_roc_auc_ovr'])])
    f1_best_params_dt.append(model_dt.cv_results_['params'][np.argmin(model_dt.cv_results_['rank_test_f1_micro'])])
    
    all_models_dt.append(model_dt)

In [37]:
accuracy_best_params_dt

[{'criterion': 'entropy', 'max_depth': 13, 'min_samples_leaf': 10},
 {'criterion': 'entropy', 'max_depth': 91, 'min_samples_leaf': 10},
 {'criterion': 'entropy', 'max_depth': 16, 'min_samples_leaf': 10},
 {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 10},
 {'criterion': 'entropy', 'max_depth': 13, 'min_samples_leaf': 10}]

In [38]:
roc_auc_best_params_dt

[{'criterion': 'entropy', 'max_depth': 13, 'min_samples_leaf': 10},
 {'criterion': 'entropy', 'max_depth': 67, 'min_samples_leaf': 20},
 {'criterion': 'entropy', 'max_depth': 13, 'min_samples_leaf': 10},
 {'criterion': 'gini', 'max_depth': 40, 'min_samples_leaf': 20},
 {'criterion': 'entropy', 'max_depth': 79, 'min_samples_leaf': 10}]

In [39]:
f1_best_params_dt

[{'criterion': 'entropy', 'max_depth': 13, 'min_samples_leaf': 10},
 {'criterion': 'entropy', 'max_depth': 91, 'min_samples_leaf': 10},
 {'criterion': 'entropy', 'max_depth': 16, 'min_samples_leaf': 10},
 {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 10},
 {'criterion': 'entropy', 'max_depth': 13, 'min_samples_leaf': 10}]

In [66]:
#Now we need to loop across all models from each metric, train on entire sample, and then predict on test set
accuracy_test_errors_dt = []
accuracy_train_errors_dt = []
for param in accuracy_best_params_dt:
    
    n_dt = 0
    
    dt_clf = DecisionTreeClassifier(criterion = param['criterion'], max_depth = param['max_depth'],
                                   min_samples_leaf = param['min_samples_leaf'])
    model = dt_clf.fit(X_training_sets_dt[n_dt], y_training_sets_dt[n_dt])
    
    y_pred_dt = model.predict(X_testing_sets_dt[n_dt])
    accuracy_test_errors_dt.append(accuracy_score(y_testing_sets_dt[n_dt], y_pred_dt))
    
    y_pred_train_dt = model.predict(X_training_sets_dt[n_dt])
    accuracy_train_errors_dt.append(accuracy_score(y_training_sets_dt[n_dt], y_pred_train_dt))
    
    n_dt += 1

In [67]:
accuracy_test_errors_dt

[0.7828657314629258,
 0.7907815631262525,
 0.7862725450901804,
 0.7805611222444889,
 0.7826653306613226]

In [68]:
accuracy_train_errors_dt

[0.8804, 0.8938, 0.8912, 0.8652, 0.8804]

In [69]:
roc_auc_test_errors_dt = []
roc_auc_train_errors_dt = []
for param in roc_auc_best_params_dt:
    
    n_dt = 0
    
    dt_clf2 = DecisionTreeClassifier(criterion = param['criterion'], max_depth = param['max_depth'],
                                   min_samples_leaf = param['min_samples_leaf'])
    model = dt_clf2.fit(X_training_sets_dt[n_dt], y_training_sets_dt[n_dt])
    
    y_pred_dt2 = model.predict(X_testing_sets_dt[n_dt])
    roc_auc_test_errors_dt.append(roc_auc_score(y_testing_sets_dt[n_dt], y_pred_dt2))
    
    y_pred_train_dt2 = model.predict(X_training_sets_dt[n_dt])
    roc_auc_train_errors_dt.append(roc_auc_score(y_training_sets_dt[n_dt], y_pred_train_dt2))
    
    n_dt += 1

In [70]:
roc_auc_test_errors_dt

[0.7758624922187968,
 0.7688116125384306,
 0.7779214526451702,
 0.7713067592948901,
 0.7868790014568267]

In [71]:
roc_auc_train_errors_dt

[0.87460040053088,
 0.8445774386313115,
 0.8783258022109034,
 0.8458987518448791,
 0.8931138655068463]

In [72]:
f1_score_test_errors_dt = []
f1_score_train_errors_dt = []
for param in f1_best_params_dt:
    
    n_dt = 0
    
    dt_clf3 = DecisionTreeClassifier(criterion = param['criterion'], max_depth = param['max_depth'],
                                   min_samples_leaf = param['min_samples_leaf'])
    model = dt_clf3.fit(X_training_sets_dt[n_dt], y_training_sets_dt[n_dt])
    
    y_pred_dt3 = model.predict(X_testing_sets_dt[n_dt])
    f1_score_test_errors_dt.append(f1_score(y_testing_sets_dt[n_dt], y_pred_dt3))
    
    y_pred_train_dt3 = model.predict(X_training_sets_dt[n_dt])
    f1_score_train_errors_dt.append(f1_score(y_training_sets_dt[n_dt], y_pred_train_dt3))
    
    n_dt += 1

In [73]:
f1_score_test_errors_dt

[0.7528334287349743,
 0.7595454545454545,
 0.7554790590935169,
 0.7469740634005763,
 0.7494538346556283]

In [74]:
f1_score_train_errors_dt

[0.867244829886591,
 0.883956043956044,
 0.8792912513842746,
 0.8495535714285715,
 0.8668744434550312]