# Import Basic Libraries

In [1]:
import pandas as pd
import numpy as np

# Import libraries for ml methods
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import make_scorer, matthews_corrcoef, balanced_accuracy_score, \
    f1_score, fbeta_score, recall_score, precision_score, average_precision_score, accuracy_score

# import optuna for hyperparameter tuning
import optuna
from optuna.samplers import TPESampler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score

# Data loading

In [2]:
data = pd.read_csv('Hepatitis_C.csv')
data

Unnamed: 0,Age,Sex,ALB,ALP,ALT,AST,BIL,CHE,CHOL,CREA,GGT,PROT,label
0,32,0,43.2,52.0,30.6,22.6,18.9,7.33,4.74,80.0,33.8,75.7,0
1,45,0,41.7,73.2,43.6,29.4,6.4,8.89,5.31,71.0,67.4,70.3,0
2,55,0,41.5,59.5,15.4,16.2,6.8,6.35,5.22,80.0,12.4,69.9,0
3,53,0,37.8,98.1,30.5,21.1,4.0,5.02,4.42,94.0,23.2,65.2,0
4,56,1,39.7,66.0,14.2,20.8,3.5,7.48,5.88,66.0,7.2,67.2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,62,1,32.0,416.6,5.9,110.3,50.0,5.57,6.30,55.7,650.9,68.5,1
200,64,1,24.0,102.8,2.9,44.4,20.0,1.54,3.02,63.0,35.9,71.3,1
201,64,1,29.0,87.3,3.5,99.0,48.0,1.66,3.63,66.7,64.2,82.0,1
202,46,1,33.0,62.7,39.0,62.0,20.0,3.56,4.20,52.0,50.0,71.0,1


## Split data to X and y

In [3]:
X = data.drop('label', axis=1).copy()
y = data['label'].copy()

print(X.shape, y.shape)

(204, 12) (204,)


# Normalize data

In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X = scaler.fit_transform(X)

print(X.shape, y.shape)

(204, 12) (204,)


## Train / Test split of data

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, \
                                                    stratify=y, random_state=42)

### Define the ML models

In [6]:
# Define classifiers
classifiers = {
    'LR':  LogisticRegression(),
    'GNB': GaussianNB(),
    'kNN': KNeighborsClassifier(),
    'LDA': LinearDiscriminantAnalysis(),
    'SVM': SVC()
}

## Define functions

In [7]:
def objective(trial, model):
    # -- Tune estimator algorithm
    # Wrap the objective inside a lambda and call objective inside it
#     params_svc = {
#             'C':trial.suggest_float("C", 0.01, 10),
#             'gamma':trial.suggest_float('gamma', 0.01, 1)
#         }
    
    params_lg = {
         'tol' : trial.suggest_float('tol' , 1e-6 , 1e-3),
         'C' : trial.suggest_float("C", 1e-2, 1),
         'solver' : trial.suggest_categorical('solver' , ['lbfgs','liblinear']), 
    }
    
#     if type(model) is SVC:
#         params = params_svc 
#     elif type(model) is LogisticRegression:
#         params = params_lg
    
    params=params_lg
    
    clf = model.set_params(**params)
    
    # -- Cross-validate the features reduced by dimensionality reduction methods
    cv_inner = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    
    score = cross_val_score(clf, X_train, y_train, 
                            scoring='f1_macro', cv=cv_inner)
    score = score.mean()
    return score

# Build nested Cross Validation (nCV) pipeline

For the outer loop we will use K=5 folds and for the inner loop L=3 folds.

In [8]:
from sklearn.model_selection import StratifiedKFold, cross_val_score

cv_outer = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_inner = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

In [None]:
test_score_nested = []

N_TRIALS = 3
for i in range(N_TRIALS):

#     clf = SVC()
    clf = LogisticRegression()
    
    _objective = lambda trial: objective(trial, clf)
    
    sampler = TPESampler(seed=42) # create a seed for the sampler for reproducibility
    study = optuna.create_study(direction="maximize", sampler=sampler)
    study.optimize(_objective, n_trials=250)
    model = clf.set_params(**study.best_params)
    
    model.fit(X_train, y_train)
   
    # Nested CV with parameter optimization
    test_score = cross_val_score(model, X_train, y_train, cv=cv_outer, scoring='matthews_corrcoef', n_jobs=2)
    
    test_score_nested.append(test_score.mean())

[32m[I 2023-04-06 19:27:24,834][0m A new study created in memory with name: no-name-3495ff35-5138-4726-b139-1e42cb038e73[0m
[32m[I 2023-04-06 19:27:24,868][0m Trial 0 finished with value: 0.8902359508396659 and parameters: {'tol': 0.0003751655787285152, 'C': 0.951207163345817, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.8902359508396659.[0m
[32m[I 2023-04-06 19:27:24,884][0m Trial 1 finished with value: 0.8727831308476469 and parameters: {'tol': 0.0001568626218019941, 'C': 0.16443457513284063, 'solver': 'liblinear'}. Best is trial 0 with value: 0.8902359508396659.[0m
[32m[I 2023-04-06 19:27:24,977][0m Trial 2 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006015138967314657, 'C': 0.710991852018085, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:24,997][0m Trial 3 finished with value: 0.8727831308476469 and parameters: {'tol': 0.0008326101981596214, 'C': 0.2202157195714934, 'solver': 'liblinear'}

[32m[I 2023-04-06 19:27:25,766][0m Trial 35 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00035734726573836295, 'C': 0.6777453375390909, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:25,790][0m Trial 36 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005418969644086019, 'C': 0.8877452262131279, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:25,821][0m Trial 37 finished with value: 0.8821916924664602 and parameters: {'tol': 0.000648545295800266, 'C': 0.7702695161143545, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:25,844][0m Trial 38 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005766662726775756, 'C': 0.6007968602850109, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:25,873][0m Trial 39 finished with value: 0

[32m[I 2023-04-06 19:27:26,651][0m Trial 71 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00046006442872303536, 'C': 0.613874056556646, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:26,676][0m Trial 72 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005291332751630922, 'C': 0.5686701961623192, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:26,698][0m Trial 73 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005002954683987303, 'C': 0.6607149149079643, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:26,726][0m Trial 74 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00032342412109438517, 'C': 0.7120538645428148, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:26,752][0m Trial 75 finished with val

[32m[I 2023-04-06 19:27:27,577][0m Trial 107 finished with value: 0.8821916924664602 and parameters: {'tol': 0.0009138162232497099, 'C': 0.7135434669846512, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:27,601][0m Trial 108 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005609430870977315, 'C': 0.7743283073356466, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:27,626][0m Trial 109 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0004945744356795244, 'C': 0.7524996176905668, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:27,653][0m Trial 110 finished with value: 0.8985373862463645 and parameters: {'tol': 0.000468373409518318, 'C': 0.8564763566172857, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:27,675][0m Trial 111 finished with valu

[32m[I 2023-04-06 19:27:28,463][0m Trial 142 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006337241258198478, 'C': 0.7698068849349278, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:28,486][0m Trial 143 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006107451845682508, 'C': 0.7197738559203231, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:28,510][0m Trial 144 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006614101978059692, 'C': 0.805457931962202, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:28,536][0m Trial 145 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005609443567759839, 'C': 0.6800865459350738, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:28,560][0m Trial 146 finished with 

[32m[I 2023-04-06 19:27:29,366][0m Trial 177 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0004922174192490084, 'C': 0.533297411265371, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:29,392][0m Trial 178 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0008617216677303326, 'C': 0.7530817529696034, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:29,417][0m Trial 179 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006202273124999179, 'C': 0.714937497896521, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:29,445][0m Trial 180 finished with value: 0.8985373862463645 and parameters: {'tol': 0.000901959413985394, 'C': 0.9270456562529419, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:29,471][0m Trial 181 finished with va

[32m[I 2023-04-06 19:27:30,308][0m Trial 212 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006009154118830276, 'C': 0.7355018497909418, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:30,333][0m Trial 213 finished with value: 0.8985373862463645 and parameters: {'tol': 0.000417149793091299, 'C': 0.7538175982796836, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:30,360][0m Trial 214 finished with value: 0.8985373862463645 and parameters: {'tol': 8.677885243214667e-05, 'C': 0.7997683665019419, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:30,389][0m Trial 215 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0003860803558126794, 'C': 0.979833240676418, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:30,414][0m Trial 216 finished with v

[32m[I 2023-04-06 19:27:31,262][0m Trial 247 finished with value: 0.8985373862463645 and parameters: {'tol': 0.000939601177634649, 'C': 0.7765658623422487, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:31,291][0m Trial 248 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006822578171030207, 'C': 0.9392811680448383, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:31,315][0m Trial 249 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0007220841284347342, 'C': 0.7328155062885441, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:32,286][0m A new study created in memory with name: no-name-28434be5-7183-4005-9d95-f704fb7cc77f[0m
[32m[I 2023-04-06 19:27:32,314][0m Trial 0 finished with value: 0.8902359508396659 and parameters: {'tol': 0.0003751655787285152, 'C': 0.951207163345817, 'solver': 'lb

[32m[I 2023-04-06 19:27:33,021][0m Trial 32 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005180076088261638, 'C': 0.700852672341627, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,043][0m Trial 33 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00047577321168817326, 'C': 0.8838983499455, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,069][0m Trial 34 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00043395849308110743, 'C': 0.7660996210209298, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,092][0m Trial 35 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00035734726573836295, 'C': 0.6777453375390909, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,112][0m Trial 36 finished with value

[32m[I 2023-04-06 19:27:33,888][0m Trial 68 finished with value: 0.8902359508396659 and parameters: {'tol': 0.000410499905659391, 'C': 0.8611154239847328, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,910][0m Trial 69 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005552297582754955, 'C': 0.8066068492968977, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,931][0m Trial 70 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006752703495983874, 'C': 0.8994632429455924, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,952][0m Trial 71 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00046006442872303536, 'C': 0.613874056556646, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:33,972][0m Trial 72 finished with value: 0.

[32m[I 2023-04-06 19:27:34,747][0m Trial 104 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005065179838434006, 'C': 0.793816211392052, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:34,771][0m Trial 105 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00026235337556846814, 'C': 0.6660872384705353, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:34,792][0m Trial 106 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00044890879292260646, 'C': 0.8321867161838228, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:34,825][0m Trial 107 finished with value: 0.8821916924664602 and parameters: {'tol': 0.0009138162232497099, 'C': 0.7135434669846512, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:34,847][0m Trial 108 finished with va

[32m[I 2023-04-06 19:27:35,639][0m Trial 139 finished with value: 0.8902359508396659 and parameters: {'tol': 0.0007597002137538744, 'C': 0.9934039527509005, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:35,664][0m Trial 140 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0007854466859933562, 'C': 0.930033426133673, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:35,690][0m Trial 141 finished with value: 0.8985373862463645 and parameters: {'tol': 0.000837255455008839, 'C': 0.7445514221252614, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:35,715][0m Trial 142 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006337241258198478, 'C': 0.7698068849349278, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:35,737][0m Trial 143 finished with value

[32m[I 2023-04-06 19:27:36,556][0m Trial 174 finished with value: 0.8985373862463645 and parameters: {'tol': 0.00040650277073176074, 'C': 0.7782415872885357, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:36,579][0m Trial 175 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0007341784569200506, 'C': 0.8127684370311078, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:36,605][0m Trial 176 finished with value: 0.8985373862463645 and parameters: {'tol': 5.5997568493349224e-05, 'C': 0.691023042953905, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:36,627][0m Trial 177 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0004922174192490084, 'C': 0.533297411265371, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:36,651][0m Trial 178 finished with

[32m[I 2023-04-06 19:27:37,457][0m Trial 209 finished with value: 0.8904931278731588 and parameters: {'tol': 0.000448103044836499, 'C': 0.5104904650794382, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:37,490][0m Trial 210 finished with value: 0.8902359508396659 and parameters: {'tol': 0.0007978374346701169, 'C': 0.8394848601899024, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:37,516][0m Trial 211 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0007572437533747436, 'C': 0.7788590899088377, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:37,544][0m Trial 212 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0006009154118830276, 'C': 0.7355018497909418, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:37,569][0m Trial 213 finished with valu

[32m[I 2023-04-06 19:27:38,384][0m Trial 244 finished with value: 0.8902359508396659 and parameters: {'tol': 0.0003144197934780862, 'C': 0.8104567429867643, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:38,408][0m Trial 245 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005686141569845404, 'C': 0.8366574723251827, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:38,436][0m Trial 246 finished with value: 0.8526456811048693 and parameters: {'tol': 0.00047314460150195376, 'C': 0.04089241268487892, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:38,464][0m Trial 247 finished with value: 0.8985373862463645 and parameters: {'tol': 0.000939601177634649, 'C': 0.7765658623422487, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:38,489][0m Trial 248 finished with va

[32m[I 2023-04-06 19:27:39,162][0m Trial 29 finished with value: 0.8902359508396659 and parameters: {'tol': 0.0007862289326179071, 'C': 0.9243109440171117, 'solver': 'lbfgs'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:39,182][0m Trial 30 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0004122682214232088, 'C': 0.8016112025478039, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:39,201][0m Trial 31 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005393587385208191, 'C': 0.7675167938934515, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:39,225][0m Trial 32 finished with value: 0.8985373862463645 and parameters: {'tol': 0.0005180076088261638, 'C': 0.700852672341627, 'solver': 'liblinear'}. Best is trial 2 with value: 0.8985373862463645.[0m
[32m[I 2023-04-06 19:27:39,247][0m Trial 33 finished with value: 0.

In [None]:
print(model)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

matthews_corrcoef(y_pred=y_pred, y_true=y_test)

In [None]:
all_scores = pd.DataFrame(test_score_nested)
all_scores

In [None]:
# automatic nested cross-validation for random forest on a classification dataset
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
# create dataset
X, y = make_classification(n_samples=200, n_features=5, random_state=1)
# configure the cross-validation procedure
cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)
# define the model
model = RandomForestClassifier(random_state=1)
# define search space
space = dict()
space['n_estimators'] = [10, 100, 500]
space['max_features'] = [2, 4, 6]
# define search
search = GridSearchCV(model, space, scoring='accuracy', n_jobs=1, cv=cv_inner, refit=True)
# configure the cross-validation procedure
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
# execute the nested cross-validation
scores = cross_val_score(search, X, y, scoring='accuracy', cv=cv_outer, n_jobs=-1)
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))