In [1]:
import os
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit

from dimensionality_reduction import reduce_dimension
import load_database
from algorithms import *

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
database_name = os.environ['DATABASE']
n_components = int(os.environ['N_COMPONENTS'])
dimensionality_algorithm = os.environ['DIMENSIONALITY_ALGORITHM']

In [4]:
result_path = 'results/%s_%s_%s.csv' %(database_name, n_components, dimensionality_algorithm)

In [5]:
X, y = load_database.load(database_name)
X = reduce_dimension(dimensionality_algorithm, X, n_components) if n_components else X

In [6]:
X.shape

(1797, 128)

In [7]:
results = {}

In [8]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
for train_index, test_index in sss.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [9]:
result = train_test(X_train, y_train, X_test, y_test, 'ada_boost')
results.update(result)

1.3284530000000032
{'algorithm': 'SAMME', 'learning_rate': 0.9, 'n_estimators': 90}
0.7856645789839944
0.8469032707028532 0.8305555555555556 0.8475508944437664 0.8303062516097304


In [10]:
result = train_test(X_train, y_train, X_test, y_test, 'bagging')
results.update(result)

1.6497170000000025
{'bootstrap_features': 1, 'n_estimators': 45}
0.9095337508698678
1.0 0.9305555555555556 1.0 0.9295164670949488


In [11]:
result = train_test(X_train, y_train, X_test, y_test, 'extra_trees')
results.update(result)

0.11852300000000326
{'criterion': 'gini', 'n_estimators': 40, 'warm_start': 1}
0.9464161447459986
1.0 0.9416666666666667 1.0 0.940436753810146


In [12]:
result = train_test(X_train, y_train, X_test, y_test, 'random_forest')
results.update(result)

0.2436479999999932
{'criterion': 'gini', 'n_estimators': 40, 'oob_score': 0, 'warm_start': 0}
0.929714683368128
1.0 0.9222222222222223 1.0 0.921368462705266


In [13]:
result = train_test(X_train, y_train, X_test, y_test, 'logistic_regression')
results.update(result)

2.623319000000002
{'C': 1.4, 'solver': 'newton-cg', 'tol': 0.0001}
0.9498956158663883
0.9902574808629089 0.9638888888888889 0.990261887537785 0.9640267216005924


In [14]:
result = train_test(X_train, y_train, X_test, y_test, 'passive_aggressive')
results.update(result)

0.7304139999999961
{'early_stopping': True, 'loss': 'hinge', 'tol': 3.3333333333333335e-05, 'warm_start': 1}
0.9519832985386222
0.9944328462073765 0.9722222222222222 0.9944428995786393 0.9724087132113335


In [15]:
result = train_test(X_train, y_train, X_test, y_test, 'ridge')
results.update(result)

0.1382509999999968
{'alpha': 0.8, 'tol': 0.0001}
0.941544885177453
0.9756437021572721 0.9416666666666667 0.975666844377629 0.94172764436066


In [16]:
result = train_test(X_train, y_train, X_test, y_test, 'sgd')
results.update(result)

1.6964600000000019
{'alpha': 0.0011, 'loss': 'hinge', 'penalty': 'l2', 'tol': 2e-05}
0.9589422407794015
0.9895615866388309 0.9722222222222222 0.9895315071736188 0.9722365497065388


In [17]:
result = train_test(X_train, y_train, X_test, y_test, 'bernoulli')
results.update(result)

0.08560599999999852
{'alpha': 0.6}
0.8552540013917884
0.8705636743215032 0.85 0.8719456342690497 0.8501869123200783


In [18]:
result = train_test(X_train, y_train, X_test, y_test, 'gaussian')
results.update(result)

0.24517900000000736
{'var_smoothing': 1e-10}
0.8162839248434238
0.9338900487125957 0.8777777777777778 0.9343367308697624 0.8780686289589362


In [19]:
result = train_test(X_train, y_train, X_test, y_test, 'k_neighbors')
results.update(result)

0.1532179999999954
{'algorithm': 'ball_tree', 'n_neighbors': 4, 'p': 2, 'weights': 'distance'}
0.9617258176757133
1.0 0.9694444444444444 1.0 0.9691544104447444


In [20]:
result = train_test(X_train, y_train, X_test, y_test, 'nearest_centroid')
results.update(result)

0.010638000000000147
{'metric': 'cosine'}
0.9109255393180237
0.9262352122477383 0.9027777777777778 0.9257750857087723 0.903039802861737


In [21]:
result = train_test(X_train, y_train, X_test, y_test, 'mlp')
results.update(result)

2.158282
{'activation': 'tanh', 'alpha': 3.3333333333333333e-06, 'early_stopping': True, 'learning_rate': 'constant', 'solver': 'lbfgs'}
0.954070981210856
1.0 0.9777777777777777 1.0 0.9778507674341008


In [22]:
result = train_test(X_train, y_train, X_test, y_test, 'linear_svc')
results.update(result)

0.48618600000000356
{'C': 0.5, 'multi_class': 'crammer_singer', 'penalty': 'l2', 'tol': 0.0001}
0.9547668754349339
0.9972164231036882 0.9805555555555555 0.9972139735004079 0.9806454308928908


In [23]:
result = train_test(X_train, y_train, X_test, y_test, 'decision_tree')
results.update(result)

0.4574169999999924
{'criterion': 'entropy', 'splitter': 'best'}
0.7453027139874739
1.0 0.7888888888888889 1.0 0.7861819964552713


In [24]:
result = train_test(X_train, y_train, X_test, y_test, 'extra_tree')
results.update(result)

0.029536000000007334
{'criterion': 'entropy', 'splitter': 'best'}
0.6604036186499652
1.0 0.7111111111111111 1.0 0.7107523041055998


In [25]:
result = train_test(X_train, y_train, X_test, y_test, 'gradient_boosting')
results.update(result)

5.767002000000005
{'criterion': 'friedman_mse', 'learning_rate': 0.2, 'loss': 'deviance', 'tol': 5e-06}
0.8886569241475296
1.0 0.9416666666666667 1.0 0.9415191519682315


In [26]:
result = train_test(X_train, y_train, X_test, y_test, 'hist_gradient_boosting')
results.update(result)

28.010173999999992
{'l2_regularization': 0, 'tol': 1e-08}
0.9318023660403618
1.0 0.9472222222222222 1.0 0.9468682331940224


In [27]:
df = pd.DataFrame.from_records(results)

In [28]:
df

Unnamed: 0,ada_boost,bagging,bernoulli,decision_tree,extra_tree,extra_trees,gaussian,gradient_boosting,hist_gradient_boosting,k_neighbors,linear_svc,logistic_regression,mlp,nearest_centroid,passive_aggressive,random_forest,ridge,sgd
C,,,,,,,,,,,0.5,1.4,,,,,,
activation,,,,,,,,,,,,,tanh,,,,,
algorithm,SAMME,,,,,,,,,ball_tree,,,,,,,,
alpha,,,0.6,,,,,,,,,,3.33333e-06,,,,0.8,0.0011
bootstrap_features,,1.0,,,,,,,,,,,,,,,,
criterion,,,,entropy,entropy,gini,,friedman_mse,,,,,,,,gini,,
early_stopping,,,,,,,,,,,,,True,,True,,,
f1_test,0.830306,0.929516,0.850187,0.786182,0.710752,0.940437,0.8780686,0.941519,0.9468682,0.969154,0.980645,0.964027,0.977851,0.90304,0.972409,0.921368,0.941728,0.972237
f1_train,0.847551,1.0,0.871946,1,1,1,0.9343367,1,1.0,1,0.997214,0.990262,1,0.925775,0.994443,1,0.975667,0.989532
l2_regularization,,,,,,,,,0.0,,,,,,,,,


In [29]:
df.to_csv(result_path)