# Data Modelling / Model Assessment
### - evaluate_model_cv_with_tuning

This function performs a nested cross-validation (double cross-validation), which includes an internal
hyper-parameter tuning, to reduce the bias when combining the two tasks of model selection and generalization
error estimation. However, the purpose of this function is not to select the best model instance of a model 
family but instead to provide a less biased estimate of a tuned model’s performance on the dataset.

In [11]:
# Import the libraries

import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from ai_toolbox.data_modelling import evaluate_model_cv_with_tuning_parallel, evaluate_model_cv_with_tuning

In [12]:
# Import dataset

df = load_breast_cancer(as_frame=True).frame
X = df.iloc[:, :-1]
y = df.target
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


In [13]:
%%time

# Run the function and check the elapsed time

# Define splitters and model family parameter grid

cv_splitter_outer = KFold(n_splits=5, shuffle=True, random_state=1)
cv_splitter_inner = KFold(n_splits=3, shuffle=True, random_state=1)
space = dict()
space['n_estimators'] = [10, 100, 500]
space['max_features'] = [2, 4, 6]
scores, cv_results = evaluate_model_cv_with_tuning(
    model_family=RandomForestClassifier(random_state=1),
    X_data=X,
    y_data=y,
    parameter_grid=space,
    cv_inner=cv_splitter_inner,
    cv_outer=cv_splitter_outer,
    scoring=['precision', 'recall', 'accuracy']
)


CPU times: user 2.19 s, sys: 59.7 ms, total: 2.25 s
Wall time: 11.2 s


In [14]:
pd.DataFrame.from_dict(cv_results)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,"[0.028559287389119465, 0.17663009961446127, 0....","[0.005444097320095468, 0.00736860608888945, 0....","[0.005029042561848958, 0.01341557502746582, 0....","[0.0009879951681465114, 9.892178104777002e-05,...","[2, 2, 2, 4, 4, 4, 6, 6, 6]","[10, 100, 500, 10, 100, 500, 10, 100, 500]","[{'max_features': 2, 'n_estimators': 10}, {'ma...","[0.9539473684210527, 0.9473684210526315, 0.967...","[0.9736842105263158, 0.9605263157894737, 0.967...","[0.9139072847682119, 0.9337748344370861, 0.927...","[0.947179621238527, 0.9472231904263971, 0.9537...","[0.024868614955634423, 0.010921729337666952, 0...","[9, 8, 7, 3, 5, 1, 6, 3, 2]"
1,"[0.017255783081054688, 0.1770310401916504, 0.8...","[0.0032346994521889414, 0.004224368800240688, ...","[0.003977060317993164, 0.013983647028605143, 0...","[0.0001095952922899814, 0.00024226912515219913...","[2, 2, 2, 4, 4, 4, 6, 6, 6]","[10, 100, 500, 10, 100, 500, 10, 100, 500]","[{'max_features': 2, 'n_estimators': 10}, {'ma...","[0.9605263157894737, 0.9736842105263158, 0.986...","[0.9407894736842105, 0.9013157894736842, 0.907...","[0.9602649006622517, 0.9602649006622517, 0.973...","[0.953860230045312, 0.9450883002207506, 0.9560...","[0.009243036598998567, 0.03143293381294502, 0....","[2, 5, 1, 4, 3, 7, 6, 7, 9]"
2,"[0.01980288823445638, 0.16540996233622232, 0.8...","[0.00046406641071439534, 0.0015246453655303606...","[0.0037862459818522134, 0.014478762944539389, ...","[6.331239173720057e-05, 0.0008596563400933898,...","[2, 2, 2, 4, 4, 4, 6, 6, 6]","[10, 100, 500, 10, 100, 500, 10, 100, 500]","[{'max_features': 2, 'n_estimators': 10}, {'ma...","[0.9736842105263158, 0.9671052631578947, 0.967...","[0.9342105263157895, 0.9407894736842105, 0.947...","[0.9602649006622517, 0.9735099337748344, 0.966...","[0.9560532125014524, 0.9604682235389799, 0.960...","[0.016387936094598203, 0.01415850388312766, 0....","[6, 1, 2, 5, 4, 6, 3, 9, 6]"
3,"[0.022556066513061523, 0.19011346499125162, 0....","[0.0077725830997223, 0.009183756811449388, 0.0...","[0.0042905012766520185, 0.014285723368326822, ...","[0.00024578896010848554, 0.0005468779078437436...","[2, 2, 2, 4, 4, 4, 6, 6, 6]","[10, 100, 500, 10, 100, 500, 10, 100, 500]","[{'max_features': 2, 'n_estimators': 10}, {'ma...","[0.9407894736842105, 0.9407894736842105, 0.940...","[0.9736842105263158, 0.9605263157894737, 0.953...","[0.9668874172185431, 0.9602649006622517, 0.973...","[0.9604537004763566, 0.953860230045312, 0.9560...","[0.014178868344990478, 0.009243036598998567, 0...","[2, 4, 3, 5, 1, 6, 6, 9, 6]"
4,"[0.017019033432006836, 0.18534978230794272, 0....","[0.0013657203920068237, 0.0043436023085365715,...","[0.003883520762125651, 0.013503472010294596, 0...","[1.2169991200060489e-05, 0.0003276446353841858...","[2, 2, 2, 4, 4, 4, 6, 6, 6]","[10, 100, 500, 10, 100, 500, 10, 100, 500]","[{'max_features': 2, 'n_estimators': 10}, {'ma...","[0.9407894736842105, 0.9736842105263158, 0.967...","[0.9342105263157895, 0.9605263157894737, 0.967...","[0.9473684210526315, 0.9342105263157895, 0.940...","[0.9407894736842105, 0.956140350877193, 0.9583...","[0.005371688032419222, 0.01641077801216641, 0....","[8, 4, 2, 8, 2, 6, 4, 1, 6]"


In [15]:
scores

{'test_precision': {'mean': 0.9611462146749723, 'std': 0.021914632642793398},
 'test_recall': {'mean': 0.9724064009020804, 'std': 0.024089991459525565},
 'test_accuracy': {'mean': 0.9578481602235677, 'std': 0.016966367544203876}}