In [49]:
%load_ext autoreload
%autoreload 2

import sys
import os
import time


#needed to import utils.py
sys.path.append('../') 

import utils
import utils_preprocessing
import utils_exec_models

import numpy as np
import pandas as pd

from IPython.core.display import HTML

from IPython.display import clear_output

%matplotlib inline  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Get Training and Validation subsets 

In [50]:
X_train, y_train, X_valid, y_valid = utils.get_train_and_validation_data(scaled=True)

X_train.head()



Unnamed: 0,Sex_Male,Site_Onset,Diagnosis_Delay,Age_at_Onset,Riluzole,FVC_at_Diagnosis,BMI_at_Diagnosis,Q1_Speech_slope_at_Diagnosis,Q2_Salivation_slope_at_Diagnosis,Q3_Swallowing_slope_at_Diagnosis,...,Q7_Turning_in_Bed_slope_at_Diagnosis,Q8_Walking_slope_at_Diagnosis,Q9_Climbing_Stairs_slope_at_Diagnosis,Q10_Respiratory_slope_at_Diagnosis,Qty_Regions_Involved_at_Diagnosis,Region_Involved_Bulbar_at_Diagnosis,Region_Involved_Upper_Limb_at_Diagnosis,Region_Involved_Lower_Limb_at_Diagnosis,Region_Involved_Respiratory_at_Diagnosis,Patient_with_Gastrostomy_at_Diagnosis
0,1.0,1.0,0.0,0.5,0.0,1.0,0.67,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.67,1.0,1.0,1.0,0.0,0.0
1,1.0,1.0,0.5,0.75,0.0,0.0,0.67,0.0,0.0,0.0,...,0.0,0.0,0.5,0.0,0.67,1.0,1.0,1.0,0.0,0.0
2,1.0,1.0,0.0,0.25,0.0,1.0,0.33,0.5,0.0,0.0,...,0.0,0.0,0.5,0.0,0.67,1.0,1.0,1.0,0.0,0.0
3,0.0,0.0,0.5,0.5,0.0,1.0,1.0,0.5,0.5,0.5,...,0.5,0.5,0.5,0.5,0.67,1.0,0.0,1.0,1.0,0.0
4,1.0,0.0,1.0,0.75,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.5,0.0,1.0,1.0,1.0,1.0,1.0,0.0


# Create and Execute the Models using GridSearch using the inputs and outputs created in the previous step

In [64]:
%%time

csv_results_saved = os.path.abspath('exec_results/results_Single_Model.csv')


i = 1


# verify if already exists an CSV with the results
overwrite_results_saved_previously = False

if os.path.exists(csv_results_saved) and overwrite_results_saved_previously==False:
    print('Reading results saved previously...')
    df_results = utils.read_csv(csv_file=csv_results_saved)
else:
    df_results = None


kfold = utils_exec_models.get_kfold_splits()


## define the models and hyperparameters for the GridSearch
param_grid = []

# TESTING = False
TESTING = True

utils_exec_models.create_models_SVM_grid(param_grid, testing=TESTING)
utils_exec_models.create_models_NB_grid(param_grid, testing=TESTING)
utils_exec_models.create_models_DT_grid(param_grid, testing=TESTING)
utils_exec_models.create_models_kNN_grid(param_grid, testing=TESTING)
utils_exec_models.create_models_RF_grid(param_grid, testing=TESTING)
utils_exec_models.create_models_NN_grid(qty_features=X_train.shape[1],  param_grid=param_grid, testing=TESTING)

#display(param_grid)

X = X_train

if len(param_grid) > 0:

    ## execute GridSearch
    grid, df_results_aux = utils_exec_models.exec_grid_search(
        param_grid=param_grid, 
        X=X_train, 
        y=y_train,
        cv=kfold,
        verbose=1,
        return_train_score=False,
        sort_results=False,
        dataset_info='Single-Model',
        features_info='All Features',
        #
        n_jobs=2, 
    #     scoring='roc_auc',
    )

    if df_results is None:
        df_results = df_results_aux
    else:
        df_results = pd.concat([df_results, df_results_aux])

    clear_output(wait=True)

    time.sleep(2)

print()
print('FINISHED !!!')


# sort performances results and show results
df_results = utils_exec_models.sort_performances_results(df=df_results)       
display(df_results)

# save the results
utils.save_to_csv(df=df_results, csv_file=csv_results_saved)



FINISHED !!!


Unnamed: 0,Dataset,Features,Model,BalAcc,Sens,Spec,f1,AUC,Acc,Prec,Classifier,Hyperparams
5,Single-Model,All Features,Naïve Bayes,0.81,0.78,0.83,0.53,0.88,0.82,0.41,ComplementNB,"{'alpha': 0.5, 'norm': False}"
4,Single-Model,All Features,Naïve Bayes,0.8,0.77,0.83,0.53,0.88,0.83,0.41,ComplementNB,"{'alpha': 0.1, 'norm': False}"
10,Single-Model,All Features,Random Forest,0.8,0.72,0.87,0.56,0.9,0.85,0.46,RandomForestClassifier,"{'class_weight': 'balanced', 'criterion': 'gin..."
6,Single-Model,All Features,Naïve Bayes,0.78,0.68,0.89,0.56,0.88,0.86,0.48,GaussianNB,{}
7,Single-Model,All Features,Decision Tree,0.77,0.75,0.79,0.47,0.82,0.78,0.35,DecisionTreeClassifier,"{'class_weight': 'balanced', 'criterion': 'gin..."
0,Single-Model,All Features,Neural Networks,0.71,0.44,0.97,0.54,0.89,0.9,0.69,MLPClassifier,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l..."
1,Single-Model,All Features,Neural Networks,0.71,0.44,0.97,0.54,0.89,0.9,0.69,MLPClassifier,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l..."
11,Single-Model,All Features,Neural Networks,0.71,0.44,0.97,0.54,0.89,0.9,0.69,MLPClassifier,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l..."
3,Single-Model,All Features,SVM,0.68,0.38,0.98,0.52,0.91,0.91,0.8,SVC,"{'C': 0.3, 'class_weight': None, 'gamma': 'aut..."
1,Single-Model,All Features,SVM,0.66,0.33,0.99,0.47,0.91,0.9,0.82,SVC,"{'C': 0.1, 'class_weight': None, 'gamma': 'aut..."


14 samples were saved
CPU times: user 92 ms, sys: 10.9 ms, total: 103 ms
Wall time: 4.46 s


---
---
---
# OTHERS

### Show other grid properties

In [65]:
print(f'Best Bal.Acc.: {grid.best_score_:.2f}')
print(f'        Model: {grid.best_params_["classifier"]} ') 
print(f'Performance using the Validation set:  {grid.score(X_valid, y_valid):.2f}')




Best Bal.Acc.: 0.81
        Model: ComplementNB(alpha=0.5) 
Performance using the Validation set:  0.82
