### AdaBoost Hyperparameters Grid Search with default Decision Tree Learner

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read data from cleaned file

df = pd.read_csv('../data/clean_Diabetes.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 532 entries, 0 to 531
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                532 non-null    int64  
 1   Pregnancies               532 non-null    int64  
 2   Glucose                   532 non-null    int64  
 3   BloodPressure             532 non-null    int64  
 4   SkinThickness             532 non-null    int64  
 5   Insulin                   532 non-null    int64  
 6   BMI                       532 non-null    float64
 7   DiabetesPedigreeFunction  532 non-null    float64
 8   Age                       532 non-null    int64  
 9   Outcome                   532 non-null    int64  
dtypes: float64(2), int64(8)
memory usage: 41.7 KB


In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0,6,148,72,35,0,33.6,0.627,50,1
1,1,1,85,66,29,0,26.6,0.351,31,0
2,3,1,89,66,23,94,28.1,0.167,21,0
3,4,0,137,40,35,168,43.1,2.288,33,1
4,6,3,78,50,32,88,31.0,0.248,26,1


In [4]:
df.columns

Index(['Unnamed: 0', 'Pregnancies', 'Glucose', 'BloodPressure',
       'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age',
       'Outcome'],
      dtype='object')

In [5]:
df.drop('Unnamed: 0', axis = 1, inplace = True)

In [6]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [7]:
# Define X and y

X = df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age']]
y = df['Outcome']

In [8]:
from sklearn.datasets import make_classification
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier

In [9]:
# Define the model with default hyperparameters

model = AdaBoostClassifier()

# Define grid values to search

n_estimators = [10, 50, 100, 500]
learning_rate = [0.001, 0.01, 0.1, 0.25, 0.5, 1]

grid = {'n_estimators' : n_estimators, 'learning_rate': learning_rate}

# Define evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=101)

# Define the grid values for search
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring = 'accuracy')

# execute grid search

grid_result = grid_search.fit(X, y)

# summarize the best score and configuration

print("Best: {} using {}".format(grid_result.best_score_, 
                                                                               grid_result.best_params_))
print("\n\n")
# summarize all the scores that were evaluated

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stddev, param in zip(means, stds, params):
    print("{} ({}) with : {}".format(mean, stddev, param))

Best: 0.7869322152341022 using {'learning_rate': 0.1, 'n_estimators': 100}



0.7337293268110879 (0.04074309135563268) with : {'learning_rate': 0.001, 'n_estimators': 10}
0.7324714651758678 (0.038867533924231513) with : {'learning_rate': 0.001, 'n_estimators': 50}
0.7312136035406476 (0.03749230505721391) with : {'learning_rate': 0.001, 'n_estimators': 100}
0.7443745632424879 (0.046650923474466206) with : {'learning_rate': 0.001, 'n_estimators': 500}
0.7324598183088751 (0.037100178455760024) with : {'learning_rate': 0.01, 'n_estimators': 10}
0.7450034940600979 (0.04640390694893394) with : {'learning_rate': 0.01, 'n_estimators': 50}
0.7593873747961799 (0.049529086212512875) with : {'learning_rate': 0.01, 'n_estimators': 100}
0.7800722105753551 (0.037432097376760495) with : {'learning_rate': 0.01, 'n_estimators': 500}
0.7600046587467971 (0.047197042750231746) with : {'learning_rate': 0.1, 'n_estimators': 10}
0.7787910552061496 (0.03868881830700726) with : {'learning_rate': 0.1, 'n_estimat