In [1]:
from nested_cv import NestedCV

import pandas as pd
import numpy as np
from sklearn.datasets import load_boston, load_iris, load_breast_cancer
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

# When using Random Search, we get a user warning with this little number of hyperparameters
# Suppress it
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)

# Regression Example

In [2]:
boston = load_boston()
X = boston.data
y = boston.target

# Define a parameters grid
param_grid = {
     'max_depth': [3, None],
     'n_estimators': [10]
}

NCV = NestedCV(model=RandomForestRegressor(), params_grid=param_grid, outer_kfolds=5, inner_kfolds=5,
               cv_options={'sqrt_of_score':True, 'randomized_search_iter':30,
                           'recursive_feature_elimination':True, 'rfe_n_features':2})
NCV.fit(X=X,y=y)

NCV.outer_scores

[3.5150734873141296,
 3.8943682022945953,
 5.53512644066625,
 3.6314436719081504,
 4.489799527237382]

# Classification Example

## Breast Cancer (2 Classes)

In [4]:
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV

# Binary classification
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# Define a parameters grid
param_grid = {
     'max_depth': [3, None],
     'n_estimators': [10, 20]
}

NCV = NestedCV(model=RandomForestClassifier(), params_grid=param_grid, outer_kfolds=5, inner_kfolds=5,
               cv_options={'metric':roc_auc_score, 'metric_score_indicator_lower':False,
                           'randomized_search_iter':30, 'predict_proba':True})
NCV.fit(X=X,y=y)

NCV.outer_scores

binary
[0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0 1 1 1
 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1
 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0 1 1 1
 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1
 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0 1 1 1
 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1
 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0 1 1 1
 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1
 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 1 0 0 0 0 0 0 1 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 0 0 1 0 1 0 1 1 1 0 1 1
 1 0 1 1 1 0 1 0 0 0 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1
 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1]
binary
[0 0 1 0 0 0 0 0 0 1 1 0 1 1

binary
[0 0 0 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 1 0 1 0 0 0
 1 0 1 0 1 1 0 0 1 1 0 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1
 0 1 0 1 0 1 0 1 1 1 1 1 1 1 1 0 0]
binary
[0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 1 0 1 1 0 0 1
 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1
 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 1 0 1 1 0 0 1
 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1
 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 1 0 1 1 0 0 1
 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1
 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 1 0 1 1 0 0 1
 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1
 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1]
binary
[0 0 0 1 0 0 0 0 0 1 1 0 1 1

binary
[0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 0 1 0 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 1 0 1 1 1 1 0 1 0 1 1 0 1 1 1]
binary
[0 1 0 1 1 1 0 1 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0
 1 0 0 0 0 1 0 0 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0]
binary
[0 1 0 1 1 1 0 1 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0
 1 0 0 0 0 1 0 0 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0]
binary
[0 1 0 1 1 1 0 1 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0
 1 0 0 0 0 1 0 0 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0]
binary
[0 1 0 1 1 1 0 1 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0
 1 0 0 0 0 1 0 0 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0]
binary
[0 0 0 0 1 1 0 0 0 1 0 0 1 1

[0.9969155844155844,
 0.9737851662404092,
 0.9945652173913043,
 0.992226972405752,
 0.9825163398692811]

## Iris (3 Classes)

In [3]:
from sklearn.metrics import roc_auc_score, recall_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import label_binarize

# Multiclass classification
iris = load_iris()
X = iris.data
y = iris.target

# Define a parameters grid
param_grid = {
     'max_depth': [3, None],
     'n_estimators': [10, 20]
}

y = label_binarize(y, [0,1,2])

NCV = NestedCV(model=RandomForestClassifier(), params_grid=param_grid, outer_kfolds=5, inner_kfolds=5,
               cv_options={'metric':roc_auc_score, 'metric_score_indicator_lower':False,
                           'randomized_search_iter':30, 'predict_proba':True})
NCV.fit(X=X,y=y)

NCV.outer_scores

multilabel-indicator
[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]]


ValueError: Found input variables with inconsistent numbers of samples: [24, 3]