* Hyperparameter tuning of the five classifiers for emotional state detection
* 5 fold cross validation with grid-search
* Multiclass classification

In [26]:
import pandas as pd
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

from pprint import pprint
from sklearn.model_selection import train_test_split

from sklearn import metrics   
from sklearn.feature_selection import SelectFromModel,RFECV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score, PredefinedSplit
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler


from sklearn import metrics   
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import SMOTENC
from imblearn.over_sampling import ADASYN
from imblearn.over_sampling import SVMSMOTE
from imblearn.combine import SMOTEENN
from imblearn.combine import SMOTETomek

pd.options.mode.chained_assignment = None
import re
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
#warnings.filterwarnings('always')
import pickle

from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

from sklearn.metrics import classification_report
from sklearn.metrics import cohen_kappa_score
from imblearn.metrics import specificity_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import make_scorer, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix
from sklearn import metrics   

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier

from catboost import CatBoostClassifier, Pool, cv
from sklearn.neural_network import MLPClassifier


#from pandas_ml import ConfusionMatrix

#import collections

In [3]:
def read_input(p):
    #Read input file of each person
    filename='data/NOv_w5_emotionLabel_SelFeat_p'+str(p)+'.csv'
    raw_df= pd.read_csv(filename)
    
    #drop all variables that contain all NANs
    raw_df.dropna(axis=1,how='all', inplace=True)
    #reset the index
    raw_df.reset_index(drop=True, inplace=True)
    #drop columns with all zeros in pandas dataframe
    raw_df=raw_df.T[(raw_df!=0).any()].T
    
    #print("The shape of the dataframe is ",raw_df.shape)
    #print(raw_df['emotion'].value_counts())
    return raw_df

In [4]:
# replace NANs with -999
def prep_data(data):
    return data.fillna(-999)

In [5]:
#drop columns
def drop_cols(data, col_list):
    return data.drop(col_list, axis=1)

In [6]:
# normalize data with minmax
def scale_data(trn_x, tst_x):
    
    sc= StandardScaler()
    scaled_trn_x = sc.fit_transform(trn_x)
    scaled_tst_x = sc.fit_transform(tst_x)
    
    return scaled_trn_x, scaled_tst_x

In [39]:
# oversampling with SMOTE with 'minority' and 'not majority'
def over_sample_SMOTE(X_train, y_train):
    sm=SMOTE(sampling_strategy='not majority', random_state=10) # 'minority'
    X_train_ovr, y_train_ovr=sm.fit_sample(X_train, y_train)

    #print(X_train_ovr.shape, y_train_ovr.shape)
    return X_train_ovr, y_train_ovr

In [42]:
# oversampling with SVMSMOTE 
def over_sample_SVMSMOTE(X_train, y_train):
    sm=SVMSMOTE(random_state=10)
    
    X_train_ovr, y_train_ovr=sm.fit_sample(X_train, y_train)

    #print(X_train_ovr.shape, y_train_ovr.shape)
    return X_train_ovr, y_train_ovr

In [23]:
def select_k_features(X_train_scaled,X_test_scaled,y_train,k):
    selection = SelectKBest(mutual_info_classif, k)
    X_train = selection.fit_transform(X_train_scaled,y_train)
    X_test = selection.transform(X_test_scaled)
    
    return X_train, X_test

In [9]:
# define random state to re-generate the same result
random_state = 43

# total persons
p_list=[8, 10,12,13,15,20,21,25, 27, 33,35,40,46,48,49,52,54,55]
#p_list=[8]

# #of folds
n_fold=5

In [32]:
def print_results(accu, bl_accu, prec, rec_, spec_, roc_, f1_):   
    print('.....................')
    print("Average Accuracy: %.2f%% (%.2f)" % (np.mean(accu), np.std(accu)))
    print("Average Balanced_accuracy: %.2f%% (%.2f)" % (np.mean(bl_accu),np.std(bl_accu)))
    print("Average Precision: %.2f%% (%.2f)" % (np.mean(prec),np.std(prec)))
    print("Average Recall: %.2f%% (%.2f)" % (np.mean(rec_),np.std(rec_)))
    print("Average Specificity: %.2f%% (%.2f)" % (np.mean(spec_),np.std(spec_)))
    print("Average ROC AUC: %.2f%% (%.2f)" % (np.mean(roc_),np.std(roc_)))
    print("Average F1 score: %.2f%% (%.2f)" % (np.mean(f1_),np.std(f1_)))
    print('..................................................')
    print('\n')

In [14]:

p_list=[8, 10,12,13,15,20,21,25, 27, 33,35,40,46,48,49,52,54,55]
p = 12 

# for each person in the dataset, find the best hyperparamters for the model in given range 
#for p in p_list:
df=read_input(p)
#df.head()
df=prep_data(df)
# remove day_of_month variable if present in data
if 'day_of_month' in df.columns:
    drop_col=['day_of_month']
    df=drop_cols(df, drop_col)

 #remove classes that have less then 5 samples
min_c=df['emotion'].value_counts()
if (min_c <= 5).any():
    r_label=min_c[min_c <= 5].index[0]
    df = df.drop(df.index[df.emotion == r_label])

  # find the best model and test
    #print("Person "+str(p))
    #print("-------------------------------------------------------")
     #print report
    #stats(y_test, pred)
   
    #print("-------------------------------------------------------")
    
dataset = df   

y = dataset['emotion'].copy()
X = dataset.loc[:, dataset.columns != 'emotion'].copy()
#X = X.apply(pd.to_numeric)


In [11]:
pipe = Pipeline([('scaler', StandardScaler()), # MinMaxScaler()
                 ('selector', SelectKBest(mutual_info_classif, k=90)), #
                 ('classifier', LogisticRegression())])

search_space = [{'selector__k': [ 50, 70, 90]},
                
                {'classifier': [LogisticRegression(solver='lbfgs')],
                 'classifier__C': [0.01, 0.1, 1.0],
                 'classifier__penalty': ['l1', 'l2', None],
                 'classifier__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
                 'classifier__max_iter':[100, 150, 200], 
                 'classifier__class_weight':[None, 'balanced']},
                 
                {'classifier': [RandomForestClassifier()],
                 'classifier__max_depth': [5, 10, 30, None],
                 'classifier__criterion':['gini','entropy'], 
                 'classifier__bootstrap': [True],
                 'classifier__max_features':['log2', None],
                 'classifier__n_estimators': [50, 100, 200, 300, 400]},
                
                {'classifier': [MLPClassifier(random_state=1, early_stopping=True)],
                 'classifier__hidden_layer_sizes' : [(50, 50, 50), (50, 100, 50), (20, 20, 20), (30, ), (50,),(100,)], 
                 'classifier__activation' : ['tanh', 'relu', 'logistic'],
                 'classifier__max_iter':[50, 100, 150, 200, 300],
                 'classifier__solver': ['sgd', 'adam', 'lbfgs'],
                 'classifier__alpha': [0.0001, 0.001, 0.05]},
                
                {'classifier': [CatBoostClassifier(random_seed=1)],
                 'classifier__learning_rate': [0.05, 0.1, 0.15, 0.2]},
                
                {'classifier': [xgb.XGBClassifier(random_state=1)],
                 'classifier__learning_rate': [0.05, 0.1, 0.15, 0.2],
                 'classifier__colsample_bytree':[.5, .75, 1],
                 'classifier__max_depth': np.arange(3, 6, 10),
                 'classifier__n_estimators': [50, 100, 200, 300, 400]}]
                  
     
scorers = {
    'precision_score': make_scorer(precision_score, average='macro'),
    'recall_score': make_scorer(recall_score, average='macro'),
    'accuracy_score': make_scorer(accuracy_score, average='macro')
}

scorer = make_scorer(f1_score, average = 'micro')

In [12]:
LR_pipe = Pipeline([('scaler', StandardScaler()), # MinMaxScaler()
                 ('selector', SelectKBest(mutual_info_classif, k=90)), #
                 ('classifier', LogisticRegression())])

LR_search_space = [{'selector__k': [ 50, 70, 90, 110]},
                
                {'classifier': [LogisticRegression(solver='lbfgs')],
                 'classifier__C': [0.01, 0.1, 1.0],
                 'classifier__penalty': ['l1', 'l2', None],
                 'classifier__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
                 'classifier__max_iter':[100, 150, 200], 
                 'classifier__class_weight':[None, 'balanced']}]
                 
################################################################################          

RF_pipe = Pipeline([('scaler', StandardScaler()), # MinMaxScaler()
                 ('selector', SelectKBest(mutual_info_classif, k=90)), #
                 ('classifier', RandomForestClassifier())])

RF_search_space = [{'selector__k': [ 50, 70, 90, 110]},
                
                {'classifier': [RandomForestClassifier()],
                 'classifier__max_depth': [5, 10, 30, None],
                 'classifier__criterion':['gini','entropy'], 
                 'classifier__bootstrap': [True],
                 'classifier__max_features':['log2', None],
                 'classifier__n_estimators': [50, 100, 200, 300, 400]}]
                  
################################################################################

MLP_pipe = Pipeline([('scaler', StandardScaler()), # MinMaxScaler()
                 ('selector', SelectKBest(mutual_info_classif, k=90)), #
                 ('classifier', MLPClassifier(random_state=1, early_stopping=True))])

MLP_search_space = [{'selector__k': [ 50, 70, 90, 110]},
                
                {'classifier': [MLPClassifier(random_state=1, early_stopping=True)],
                 'classifier__hidden_layer_sizes' : [(50, 50, 50), (50, 100, 50), (20, 20, 20), (30, ), (50,),(100,)], 
                 'classifier__activation' : ['tanh', 'relu', 'logistic'],
                 'classifier__max_iter':[50, 100, 150, 200, 300],
                 'classifier__solver': ['sgd', 'adam', 'lbfgs'],
                 'classifier__alpha': [0.0001, 0.001, 0.05]}]

################################################################################

CB_pipe = Pipeline([('scaler', StandardScaler()), # MinMaxScaler()
                 ('selector', SelectKBest(mutual_info_classif, k=90)), #
                 ('classifier', CatBoostClassifier(random_seed=1))])

CB_search_space = [{'selector__k': [ 50, 70, 90, 110]},
                
                {'classifier': [CatBoostClassifier(random_seed=1, verbose=False)],
                 'classifier__learning_rate': [0.05, 0.1, 0.15, 0.2]}]

################################################################################

XGB_pipe = Pipeline([('scaler', StandardScaler()), # MinMaxScaler()
                 ('selector', SelectKBest(mutual_info_classif, k=90)), #
                 ('classifier', xgb.XGBClassifier(random_state=1))])

XGB_search_space = [{'selector__k': [ 50, 70, 90, 110]},
                
                {'classifier': [xgb.XGBClassifier(random_state=1)],
                 'classifier__learning_rate': [0.05, 0.1, 0.15, 0.2],
                 'classifier__colsample_bytree':[.5, .75, 1],
                 'classifier__max_depth': np.arange(3, 6, 10),
                 'classifier__n_estimators': [50, 100, 200, 300, 400]}]


In [13]:
def grid_search_wrapper(pipe = pipe, search_space = search_space, verbose= False,refit_score=scorer):
    """
    fits a GridSearchCV classifiers using refit_score for optimization
    prints classifier performance metrics
    """
    cross_validation = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)

    grid_search = GridSearchCV(pipe, search_space, cv=cross_validation, verbose=verbose,  n_jobs = -1) #scoring=scorer, refit=scorer
    
    grid_search.fit(X, y)
    
    return grid_search


In [15]:
pipeline_grid_search_LR = grid_search_wrapper(pipe = LR_pipe, search_space = LR_search_space, verbose=2)

Fitting 5 folds for each of 274 candidates, totalling 1370 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   13.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   46.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 1370 out of 1370 | elapsed:  6.6min finished
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [214]:
print(pipeline_grid_search_LR.best_estimator_)
print(pipeline_grid_search_LR.best_score_)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('selector',
                 SelectKBest(k=90,
                             score_func=<function mutual_info_classif at 0x1a1ff22710>)),
                ('classifier',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='auto', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='newton-cg', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)
0.9166666666666667


In [16]:
pipeline_grid_search_RF = grid_search_wrapper(pipe = RF_pipe, search_space = RF_search_space, verbose=2)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 84 candidates, totalling 420 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.0s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 420 out of 420 | elapsed:  3.5min finished


In [64]:
print(pipeline_grid_search_RF.best_estimator_['classifier'])
print(pipeline_grid_search_RF.best_score_)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features=None,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=400,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)
0.9685185185185186


In [17]:
pipeline_grid_search_MLP = grid_search_wrapper(pipe = MLP_pipe, search_space = MLP_search_space, verbose=2)

Fitting 5 folds for each of 814 candidates, totalling 4070 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.9s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   45.8s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:  9.8min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed: 13.0min
[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed: 16.7min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed: 20.4min
[Parallel(n_jobs=-1)]: Done 4070 out of 4070 | elapsed: 20.6min finished


In [56]:
print(pipeline_grid_search_MLP.cv_results_['params'][pipeline_grid_search_MLP.best_index_])
print(pipeline_grid_search_MLP['classifier'].best_params_)

{'classifier': MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(50,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False), 'classifier__activation': 'logistic', 'classifier__alpha': 0.0001, 'classifier__hidden_layer_sizes': (50,), 'classifier__max_iter': 200, 'classifier__solver': 'lbfgs'}


TypeError: 'GridSearchCV' object is not subscriptable

In [18]:
pipeline_grid_search_XGB = grid_search_wrapper(pipe = XGB_pipe, search_space = XGB_search_space, verbose=2)

Fitting 5 folds for each of 64 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   13.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:  3.3min finished


In [220]:
print(pipeline_grid_search_XGB.best_estimator_)
print(pipeline_grid_search_XGB.best_score_)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('selector',
                 SelectKBest(k=90,
                             score_func=<function mutual_info_classif at 0x1a1ff22710>)),
                ('classifier',
                 XGBClassifier(base_score=0.5, booster='gbtree',
                               colsample_bylevel=1, colsample_bynode=1,
                               colsample_bytree=0.5, gamma=0,
                               learning_rate=0.05, max_delta_step=0,
                               max_depth=3, min_child_weight=1, missing=None,
                               n_estimators=400, n_jobs=1, nthread=None,
                               objective='multi:softprob', random_state=1,
                               reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
                               seed=None, silent=None, subsample=1,
                               verbosity=1))],
   

In [19]:
pipeline_grid_search_CB = grid_search_wrapper(pipe = CB_pipe, search_space = CB_search_space, verbose=False)

In [71]:
print(str(pipeline_grid_search_CB.best_estimator_['selector'])[14:16])
print(pipeline_grid_search_CB.best_score_)

90
0.9703703703703702


In [67]:
pps = [pipeline_grid_search_CB, pipeline_grid_search_RF]

In [69]:
pps[0].best_estimator_['classifier']

<catboost.core.CatBoostClassifier at 0x11f7decd0>

In [35]:
best_models = {} # dictionary of best models with best parameters

best_models['Logistic Regression'] = LR_model
best_models['RandomForest Classifier'] = RF_model
best_models['MLP Classifier'] = MLP_model
best_models['XGBoost Classifier'] = XGB_model
best_models['CatBoost Classifier'] = CB_model

n_features = [90, 90, 90, 90, 70]

In [43]:
# this is to get all the detailed performance meterics after selecting the best model parameters

skf=StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=random_state)
k_i = -1

for model_name, model in best_models.items(): 
    k_i = k_i + 1
    accu = []
    prec = []
    rec_ = []
    f1_ = []
    bl_accu = []
    roc_ = []
    spec_ = []

    for p in p_list:
        
        df=read_input(p)
        #df.head()
        df=prep_data(df)
        # remove day_of_month variable if present in data
        if 'day_of_month' in df.columns:
            drop_col=['day_of_month']
            df=drop_cols(df, drop_col)

         #remove classes that have less then 5 samples
        min_c=df['emotion'].value_counts()
        if (min_c <= 5).any():
            r_label=min_c[min_c <= 5].index[0]
            df = df.drop(df.index[df.emotion == r_label])

        dataset = df   

        y = dataset['emotion'].copy()
        X = dataset.loc[:, dataset.columns != 'emotion'].copy()

        
        avg_ac=0.0
        avg_bl_ac=0.0
        avg_rc=0.0
        avg_pr=0.0
        avg_f1=0.0
        avg_spec=0.0
        avg_roc=0.0
        avg_kp=0.0

        i = 1
        for train_index, test_index in skf.split(X ,y):
            #print("fold", i)
            i+=1

            X_train, X_test = X.iloc[train_index], X.iloc[test_index] 
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            #scale features
            X_train_scaled, X_test_scaled= scale_data(X_train, X_test) 
            #feature selection
            X_train, X_test = select_k_features(X_train_scaled,X_test_scaled,y_train,k=n_features[k_i])

            #oversample training data
            #X_train_imb,y_train_imb=over_sample_SMOTE(X_train, y_train)
            #X_train_imb,y_train_imb=over_sample_SMOTENC(X_train, y_train, index1, index2)
            X_train_imb,y_train_imb=over_sample_SVMSMOTE(X_train, y_train)


            # train model on imbalance-handled data
            model.fit(X_train_imb, y_train_imb)

            #train model on imbalance data 
            #model.fit(X_train, y_train)

            # test model, measure class label and probability score
            y_pred = model.predict(X_test)
            y_scores = model.predict_proba(X_test)

            #calculate metrices
            accuracy = accuracy_score(y_test, y_pred)
            bl_accuracy = balanced_accuracy_score(y_test, y_pred)
            precision=precision_score(y_test, y_pred,  average='macro',labels=np.unique(y_pred)) #'weighted', 'micro', 'micro'
            recall=recall_score(y_test, y_pred,  average='macro',labels=np.unique(y_pred))
            #kappa=cohen_kappa_score(y_pred, y_test)
            spec=specificity_score(y_test, y_pred, average='macro',labels=np.unique(y_pred))
            #roc=roc_auc_score(y_test, y_scores, multi_class='ovr', average='macro')
            f1=f1_score(y_test, y_pred,  average='macro',labels=np.unique(y_pred))

            # sometimes not all classes are present in the test set
            not_present = list(set(model.classes_)-set(y_test.unique()))
            # get that class
            if not_present:
                not_present=not_present[0] # get the element then its index
                ind= list(model.classes_).index(not_present)
                y_scores = np.delete(y_scores,ind,1) # delete it from the scores
                y_scores = y_scores / y_scores.sum(axis=1)[:,None]  #make sure sum equals ro 0 (sum of probabilities)
            else:
                pass


            roc=roc_auc_score(y_test, y_scores, multi_class='ovr', average='macro')

            ac=accuracy * 100.0
            pr=precision*100
            rc=recall*100
            f1_p=f1*100
            bl_ac=bl_accuracy*100
            roc=roc*100
            spec=spec*100


            #update average metrices in each fold
            avg_ac+=ac
            avg_bl_ac+=bl_ac
            avg_rc+=rc
            avg_pr+=pr
            avg_f1+=f1_p
            avg_roc+=roc
            avg_spec+=spec
    
        avg_ac = avg_ac/nfolds
        avg_bl_ac = avg_bl_ac/nfolds
        avg_rc = avg_rc/nfolds
        avg_pr = avg_pr/nfolds
        avg_f1 = avg_f1/nfolds
        avg_roc = avg_roc/nfolds
        avg_spec = avg_spec/nfolds
            
        accu.append(avg_ac)
        prec.append(avg_pr)
        rec_.append(avg_rc)
        f1_.append(avg_f1)
        bl_accu.append(avg_bl_ac)
        roc_.append(avg_roc)
        spec_.append(avg_spec)
    
    print('Restuls for: ', model_name)
    print_results(accu, bl_accu, prec, rec_, spec_, roc_, f1_)


Restuls for:  Logistic Regression
.....................
Average Accuracy: 64.43% (9.97)
Average Balanced_accuracy: 63.78% (9.97)
Average Precision: 57.88% (10.68)
Average Recall: 66.03% (10.27)
Average Specificity: 86.16% (7.56)
Average ROC AUC: 84.52% (7.09)
Average F1 score: 56.73% (10.58)
..................................................


Restuls for:  RandomForest Classifier
.....................
Average Accuracy: 80.67% (9.27)
Average Balanced_accuracy: 67.23% (16.30)
Average Precision: 82.42% (6.91)
Average Recall: 72.07% (10.89)
Average Specificity: 88.93% (7.63)
Average ROC AUC: 94.43% (3.23)
Average F1 score: 72.33% (10.90)
..................................................


Restuls for:  MLP Classifier
.....................
Average Accuracy: 22.90% (16.90)
Average Balanced_accuracy: 27.46% (5.24)
Average Precision: 23.76% (16.89)
Average Recall: 85.18% (14.03)
Average Specificity: 18.83% (19.29)
Average ROC AUC: 52.45% (4.91)
Average F1 score: 28.28% (15.49)
..............

In [44]:
i=6