# DT model optimization notebook
Author: Ida Thrane (idth@itu.dk)

In this notebook the best parameters of the DT model are found.

In [1]:
#Import libraries
import pickle
import pandas as pd

from numpy import unique, bincount, array 
from numpy import mean as mean_np 
from numpy import std as std_np
from sklearn.metrics import accuracy_score, make_scorer
from imblearn.metrics import geometric_mean_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV

from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

from models import create_dt_model

2023-08-31 14:34:57.075460: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Load data

In [4]:
with open('data/X_train.pickle', 'rb') as file:
    #X_train = pickle.load(file)
    X_train = pd.read_pickle(file)
    X_train = X_train.values
    
with open('data/y_train.pickle', 'rb') as file:
    #y_train = pickle.load(file)
    y_train = pd.read_pickle(file)
    y_train = y_train.values.ravel()

In [5]:
#Instantiate variables

#Define random state
random_state = 42

# Define inverse class weights
weight_minor_class, weight_major_class = len(y_train) / (len(unique(y_train)) * bincount(y_train.ravel().astype('int64')))

#Define list class weights to try in grid search
weights = [{0: 1, 1: 1}, 
           {0: weight_minor_class, 1: weight_major_class}, 
           {0: weight_major_class, 1: weight_minor_class}, 
           {0:1, 1:10},
           {0:1, 1:50}]

#Define list of tree depths to try in grid search
tree_depths = [2, 3, 10, 50, 100]

#Define dictionary of grid parameters
grid_param = dict(class_weight=weights, 
                  max_depth=tree_depths) 

#Define number of splits
n_splits = 9

#Define metrics to optimise for
metrics = {'accuracy': make_scorer(accuracy_score),
           'g-mean': make_scorer(geometric_mean_score)}

#Define refitting metric
refitting_metric = 'g-mean'

In [7]:
def grid_search(model, 
                X, 
                y, 
                grid_param, 
                n_splits, 
                metrics, 
                refitting_metric, 
                random_state):
    
    #Define stratified cross validation
    cv = StratifiedKFold(n_splits=n_splits, 
                         shuffle=True,
                         random_state=random_state)
    
    #Define grid search
    grid = GridSearchCV(estimator=model, 
                        param_grid=grid_param,
                        cv=StratifiedKFold(n_splits=n_splits,
                                           shuffle=True,
                                           random_state=random_state), 
                        scoring=metrics,
                        refit=refitting_metric,
                        verbose=2)
    
    #Fit grid search to input 
    grid_search = grid.fit(X, y)
    print(grid_search)

    #Means
    mean_accuracy = array(grid_search.cv_results_['mean_test_accuracy'])
    mean_geometric_mean = array(grid_search.cv_results_['mean_test_g-mean'])
    #Standard deviations
    std_accuracy = array(grid_search.cv_results_['std_test_accuracy'])
    std_geometric_mean = array(grid_search.cv_results_['std_test_g-mean'])

    params = grid_search.cv_results_['params']
    #Print results
    for mean_acc, std_acc, mean_g_mean, std_g_mean, param in zip(mean_accuracy, std_accuracy, mean_geometric_mean, std_geometric_mean, params):
        print("Parameters: ", param)
        print("Average accuracy: ", mean_acc,  "Stand. deviation: ", std_acc)
        print("\nAverage geometric mean: ", mean_g_mean, "Stand. deviation: ", std_g_mean)
        
    print("\nBest geometric mean: %f \nParameters: %s" % (grid_search.best_score_, grid_search.best_params_))


### Test original data

In [8]:
#Instantiate model to optimise
initial_dt_model = DecisionTreeClassifier(random_state=random_state)

#Run optimization function
grid_search_optimise = grid_search(initial_dt_model, 
            X_train, 
            y_train, 
            grid_param = grid_param, 
            n_splits = n_splits, 
            metrics = metrics, 
            refitting_metric = refitting_metric, 
            random_state = random_state)

Fitting 9 folds for each of 25 candidates, totalling 225 fits
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}

[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total tim

[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.3s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.2s
[CV] END ............class_w

### Grid search with oversampled data (sampling strategy 0.10, 0.30 and 0.50)

In [68]:
###Sampling strategy 0.10

oversample10 = RandomOverSampler(sampling_strategy=0.10)
over_X_10, over_y_10 = oversample10.fit_resample(X_train, y_train)

In [74]:
#Instantiate model to optimise
initial_dt_model_over_10 = DecisionTreeClassifier(random_state=random_state)

#Run optimization function
grid_search_optimise = grid_search(initial_dt_model_over_10, 
            over_X_10, 
            over_y_10, 
            grid_param = grid_param, 
            n_splits = n_splits, 
            metrics = metrics, 
            refitting_metric = refitting_metric, 
            random_state = random_state)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.4s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.4s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.4s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.4s
[CV] END ............class_weight={0: 1, 1: 1}, max_depth=10; total time=   1.0s
[CV] END ............class_weight={0: 1, 1: 1},

[CV] END ...........class_weight={0: 1, 1: 10}, max_depth=10; total time=   0.9s
[CV] END ...........class_weight={0: 1, 1: 10}, max_depth=50; total time=   1.4s
[CV] END ...........class_weight={0: 1, 1: 10}, max_depth=50; total time=   1.4s
[CV] END ...........class_weight={0: 1, 1: 10}, max_depth=50; total time=   1.4s
[CV] END ...........class_weight={0: 1, 1: 10}, max_depth=50; total time=   1.6s
[CV] END ...........class_weight={0: 1, 1: 10}, max_depth=50; total time=   1.5s
[CV] END ..........class_weight={0: 1, 1: 10}, max_depth=100; total time=   1.4s
[CV] END ..........class_weight={0: 1, 1: 10}, max_depth=100; total time=   1.5s
[CV] END ..........class_weight={0: 1, 1: 10}, max_depth=100; total time=   1.4s
[CV] END ..........class_weight={0: 1, 1: 10}, max_depth=100; total time=   1.4s
[CV] END ..........class_weight={0: 1, 1: 10}, max_depth=100; total time=   1.3s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.3s
[CV] END ............class_w

In [9]:
###Sampling strategy 0.30

oversample30 = RandomOverSampler(sampling_strategy=0.30)
over_X_30, over_y_30 = oversample30.fit_resample(X_train, y_train)

In [10]:
#Instantiate model to optimise
initial_dt_model_over_30 = DecisionTreeClassifier(random_state=random_state)

#Run optimization function
grid_search_optimise = grid_search(initial_dt_model_over_30, 
            over_X_30, 
            over_y_30, 
            grid_param = grid_param, 
            n_splits = n_splits, 
            metrics = metrics, 
            refitting_metric = refitting_metric, 
            random_state = random_state)

Fitting 9 folds for each of 25 candidates, totalling 225 fits
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}

[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.3s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total tim

[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.3s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.2s
[CV] END ............class_w

In [11]:
###Sampling strategy 0.50

oversample50 = RandomOverSampler(sampling_strategy=0.50)
over_X_50, over_y_50 = oversample50.fit_resample(X_train, y_train)

In [12]:
#Instantiate model to optimise
initial_dt_model_over_50 = DecisionTreeClassifier(random_state=random_state)

#Run optimization function
grid_search_optimise = grid_search(initial_dt_model_over_50, 
            over_X_50, 
            over_y_50, 
            grid_param = grid_param, 
            n_splits = n_splits, 
            metrics = metrics, 
            refitting_metric = refitting_metric, 
            random_state = random_state)

Fitting 9 folds for each of 25 candidates, totalling 225 fits
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.3s
[CV] END .............class_weight={0: 1, 1: 1}

[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.2s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total tim

[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.2s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.3s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.3s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.3s
[CV] END ............class_w

### Grid search with undersampled data

In [13]:
###Sampling strategy 0.10

undersample10 = RandomUnderSampler(sampling_strategy=0.10)
under_X_10, under_y_10 = undersample10.fit_resample(X_train, y_train)

In [14]:
#Instantiate model to optimise
initial_dt_model_under_10 = DecisionTreeClassifier(random_state=random_state)

#Run optimization function
grid_search_optimise = grid_search(initial_dt_model_under_10, 
            under_X_10, 
            under_y_10, 
            grid_param = grid_param, 
            n_splits = n_splits, 
            metrics = metrics, 
            refitting_metric = refitting_metric, 
            random_state = random_state)

Fitting 9 folds for each of 25 candidates, totalling 225 fits
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.2s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.1s
[CV] END .............class_weight={0: 1, 1: 1}

[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.1s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total tim

[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=2; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.1s
[CV] END ............class_w

In [15]:
###Sampling strategy 0.30

undersample30 = RandomUnderSampler(sampling_strategy=0.30)
under_X_30, under_y_30 = undersample30.fit_resample(X_train, y_train)

In [16]:
#Instantiate model to optimise
initial_dt_model_under_30 = DecisionTreeClassifier(random_state=random_state)

#Run optimization function
grid_search_optimise = grid_search(initial_dt_model_under_30, 
            under_X_30, 
            under_y_30, 
            grid_param = grid_param, 
            n_splits = n_splits, 
            metrics = metrics, 
            refitting_metric = refitting_metric, 
            random_state = random_state)

Fitting 9 folds for each of 25 candidates, totalling 225 fits
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}

[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total tim

[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_we

In [17]:
###Sampling strategy 0.30

undersample50 = RandomUnderSampler(sampling_strategy=0.50)
under_X_50, under_y_50 = undersample50.fit_resample(X_train, y_train)

In [18]:
#Instantiate model to optimise
initial_dt_model_under_50 = DecisionTreeClassifier(random_state=random_state)

#Run optimization function
grid_search_optimise = grid_search(initial_dt_model_under_50, 
            under_X_50, 
            under_y_50, 
            grid_param = grid_param, 
            n_splits = n_splits, 
            metrics = metrics, 
            refitting_metric = refitting_metric, 
            random_state = random_state)

Fitting 9 folds for each of 25 candidates, totalling 225 fits
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=2; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}, max_depth=3; total time=   0.0s
[CV] END .............class_weight={0: 1, 1: 1}

[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=2; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total time=   0.0s
[CV] END class_weight={0: 7.981345826235094, 1: 0.5334164474957589}, max_depth=3; total tim

[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ............class_weight={0: 1, 1: 50}, max_depth=3; total time=   0.0s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_weight={0: 1, 1: 50}, max_depth=10; total time=   0.1s
[CV] END ...........class_we

The best geometric mean (0.977274) is obtained using the parameters: 'class_weight': {0: 1, 1: 50} and 'max_depth': 100. This gives a standard deviation of 0.00053 in the grid search, and an accuracy of  0.9702919181566261 with a standard deviation of 0.0006272604506923014. This is obtained using an oversampling strategy of 0.5. 