In [1]:
import os
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit

from dimensionality_reduction import reduce_dimension
import load_database
from algorithms import *

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
database_name = os.environ['DATABASE']
n_components = int(os.environ['N_COMPONENTS'])
dimensionality_algorithm = os.environ['DIMENSIONALITY_ALGORITHM']

In [4]:
result_path = 'results/%s_%s_%s.csv' %(database_name, n_components, dimensionality_algorithm)

In [5]:
X, y = load_database.load(database_name)
X = reduce_dimension(dimensionality_algorithm, X, n_components) if n_components else X

In [6]:
X.shape

(1797, 32)

In [7]:
results = {}

In [8]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
for train_index, test_index in sss.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [9]:
result = train_test(X_train, y_train, X_test, y_test, 'ada_boost')
results.update(result)

0.9847999999999999
{'algorithm': 'SAMME', 'learning_rate': 0.9, 'n_estimators': 90}
0.6951983298538622
0.7473903966597077 0.7055555555555556 0.7396622729632958 0.6971222185042099


In [10]:
result = train_test(X_train, y_train, X_test, y_test, 'bagging')
results.update(result)

0.7917519999999998
{'bootstrap_features': 1, 'n_estimators': 45}
0.9311064718162839
1.0 0.9694444444444444 1.0 0.9693873732141257


In [11]:
result = train_test(X_train, y_train, X_test, y_test, 'extra_trees')
results.update(result)

0.09917700000000007
{'criterion': 'gini', 'n_estimators': 35, 'warm_start': 0}
0.9624217118997912
1.0 0.975 1.0 0.974905702861451


In [12]:
result = train_test(X_train, y_train, X_test, y_test, 'random_forest')
results.update(result)

0.2344589999999993
{'criterion': 'gini', 'n_estimators': 45, 'oob_score': 0, 'warm_start': 1}
0.9436325678496869
1.0 0.9638888888888889 1.0 0.96376946175916


In [13]:
result = train_test(X_train, y_train, X_test, y_test, 'logistic_regression')
results.update(result)

1.804259
{'C': 1.4, 'solver': 'saga', 'tol': 0.0001}
0.941544885177453
0.9575504523312457 0.9555555555555556 0.9576449405847262 0.9555566572103006


In [14]:
result = train_test(X_train, y_train, X_test, y_test, 'passive_aggressive')
results.update(result)

0.3278220000000003
{'early_stopping': False, 'loss': 'squared_hinge', 'tol': 2e-05, 'warm_start': 0}
0.941544885177453
0.9631176061238692 0.9638888888888889 0.962851511950115 0.963747693863139


In [15]:
result = train_test(X_train, y_train, X_test, y_test, 'ridge')
results.update(result)

0.21161799999999964
{'alpha': 0.5, 'tol': 0.0001}
0.9227557411273486
0.9394572025052192 0.925 0.9388695890226493 0.9248169771445035


In [16]:
result = train_test(X_train, y_train, X_test, y_test, 'sgd')
results.update(result)

1.0160359999999997
{'alpha': 0.0011, 'loss': 'hinge', 'penalty': 'none', 'tol': 1.25e-05}
0.9491997216423104
0.9659011830201809 0.9555555555555556 0.9658248965258884 0.9560017289636243


In [17]:
result = train_test(X_train, y_train, X_test, y_test, 'bernoulli')
results.update(result)

0.322261000000001
{'alpha': 0.1}
0.10299234516353514
0.11969380654140571 0.1 0.0542952509008983 0.018181818181818184


In [18]:
result = train_test(X_train, y_train, X_test, y_test, 'gaussian')
results.update(result)

0.3601200000000002
{'var_smoothing': 1e-10}
0.918580375782881
0.9380654140570633 0.9416666666666667 0.9385467852410239 0.9422094648990904


In [19]:
result = train_test(X_train, y_train, X_test, y_test, 'k_neighbors')
results.update(result)

0.08491200000000099
{'algorithm': 'ball_tree', 'n_neighbors': 4, 'p': 2, 'weights': 'distance'}
0.9672929714683368
1.0 0.975 1.0 0.9749930550638048


In [20]:
result = train_test(X_train, y_train, X_test, y_test, 'nearest_centroid')
results.update(result)

0.010041999999998552
{'metric': 'euclidean'}
0.9039665970772442
0.9220598469032707 0.9083333333333333 0.9228000273320254 0.9092314857267553


In [21]:
result = train_test(X_train, y_train, X_test, y_test, 'mlp')
results.update(result)

3.9250120000000006
{'activation': 'tanh', 'alpha': 1.25e-06, 'early_stopping': True, 'learning_rate': 'constant', 'solver': 'lbfgs'}
0.9491997216423104
1.0 0.9638888888888889 1.0 0.9638317286911687


In [22]:
result = train_test(X_train, y_train, X_test, y_test, 'linear_svc')
results.update(result)

0.5341770000000015
{'C': 1.3, 'multi_class': 'ovr', 'penalty': 'l2', 'tol': 0.0001}
0.9464161447459986
0.9714683368128044 0.9666666666666667 0.971390928568462 0.9667830091329994


In [23]:
result = train_test(X_train, y_train, X_test, y_test, 'decision_tree')
results.update(result)

0.3701350000000012
{'criterion': 'entropy', 'splitter': 'best'}
0.7800974251913709
1.0 0.8527777777777777 1.0 0.8538348486605282


In [24]:
result = train_test(X_train, y_train, X_test, y_test, 'extra_tree')
results.update(result)

0.01974999999999838
{'criterion': 'gini', 'splitter': 'best'}
0.6764091858037579
1.0 0.7666666666666667 1.0 0.7652852422114007


In [25]:
result = train_test(X_train, y_train, X_test, y_test, 'gradient_boosting')
results.update(result)

2.723500999999999
{'criterion': 'friedman_mse', 'learning_rate': 0.2, 'loss': 'deviance', 'tol': 1e-05}
0.9171885873347251
1.0 0.9388888888888889 1.0 0.9382770881589623


In [26]:
result = train_test(X_train, y_train, X_test, y_test, 'hist_gradient_boosting')
results.update(result)

15.170116999999998
{'l2_regularization': 0, 'tol': 1e-08}
0.9338900487125957
1.0 0.9694444444444444 1.0 0.9693645285377068


In [27]:
df = pd.DataFrame.from_records(results)

In [28]:
df

Unnamed: 0,ada_boost,bagging,bernoulli,decision_tree,extra_tree,extra_trees,gaussian,gradient_boosting,hist_gradient_boosting,k_neighbors,linear_svc,logistic_regression,mlp,nearest_centroid,passive_aggressive,random_forest,ridge,sgd
C,,,,,,,,,,,1.3,1.4,,,,,,
activation,,,,,,,,,,,,,tanh,,,,,
algorithm,SAMME,,,,,,,,,ball_tree,,,,,,,,
alpha,,,0.1,,,,,,,,,,1.25e-06,,,,0.5,0.0011
bootstrap_features,,1.0,,,,,,,,,,,,,,,,
criterion,,,,entropy,gini,gini,,friedman_mse,,,,,,,,gini,,
early_stopping,,,,,,,,,,,,,True,,False,,,
f1_test,0.697122,0.969387,0.018182,0.853835,0.765285,0.974906,0.9422095,0.938277,0.9693645,0.974993,0.966783,0.955557,0.963832,0.909231,0.963748,0.963769,0.924817,0.956002
f1_train,0.739662,1.0,0.054295,1,1,1,0.9385468,1,1.0,1,0.971391,0.957645,1,0.9228,0.962852,1,0.93887,0.965825
l2_regularization,,,,,,,,,0.0,,,,,,,,,


In [29]:
df.to_csv(result_path)