## AdaBoost Hyperparameter Tuning (F1-Score)

In [6]:
# Load Libraries

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

from sklearn.ensemble import AdaBoostClassifier

import optuna

In [8]:
# Load Data

df = pd.read_csv('bank_4.csv', index_col=0)

In [10]:
# Train / Test Split

X = df.drop(columns=['churn', 'complain', 'umap_1', 'umap_2'])
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [12]:
# Hyperparameter tuning

def objective(trial):

    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 1.0)
    }

    model = AdaBoostClassifier(
        **params,
        random_state=42)
    
    threshold = 0.5
    
    skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
    scores = []
    
    for tr, te in skf.split(X_train, y_train):
        
        X_tr, X_te = X_train.iloc[tr], X_train.iloc[te]
        y_tr, y_te = y_train.iloc[tr], y_train.iloc[te]
        
        model.fit(X_tr, y_tr)
        prob = model.predict_proba(X_te)[:, 1]
        y_pred = np.where(prob < threshold, 0, 1)
        
        scores.append(f1_score(y_te, y_pred))
        
    return np.mean(scores)

In [14]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100, show_progress_bar=True)

[I 2024-07-10 13:16:51,504] A new study created in memory with name: no-name-c1c1be00-8f6d-43ae-86de-38ec383b82cb


  0%|          | 0/100 [00:00<?, ?it/s]

[I 2024-07-10 13:17:11,997] Trial 0 finished with value: 0.5480569389626847 and parameters: {'n_estimators': 1000, 'learning_rate': 0.41275573238648755}. Best is trial 0 with value: 0.5480569389626847.
[I 2024-07-10 13:17:15,207] Trial 1 finished with value: 0.541737835131191 and parameters: {'n_estimators': 139, 'learning_rate': 0.1977383117205113}. Best is trial 0 with value: 0.5480569389626847.
[I 2024-07-10 13:17:21,025] Trial 2 finished with value: 0.5478098840093144 and parameters: {'n_estimators': 285, 'learning_rate': 0.472714180822971}. Best is trial 0 with value: 0.5480569389626847.
[I 2024-07-10 13:17:41,440] Trial 3 finished with value: 0.5485441736464868 and parameters: {'n_estimators': 974, 'learning_rate': 0.23475213670350714}. Best is trial 3 with value: 0.5485441736464868.
[I 2024-07-10 13:17:56,836] Trial 4 finished with value: 0.5483635958733972 and parameters: {'n_estimators': 755, 'learning_rate': 0.8851851871692377}. Best is trial 3 with value: 0.5485441736464868.

In [16]:
print("Best trial:", study.best_trial)
print("Best hyperparameters:", study.best_params)

Best trial: FrozenTrial(number=68, state=1, values=[0.5518392752384444], datetime_start=datetime.datetime(2024, 7, 10, 13, 24, 13, 600598), datetime_complete=datetime.datetime(2024, 7, 10, 13, 24, 15, 953288), params={'n_estimators': 109, 'learning_rate': 0.846886313092084}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=1000, log=False, low=50, step=1), 'learning_rate': FloatDistribution(high=1.0, log=False, low=0.001, step=None)}, trial_id=68, value=None)
Best hyperparameters: {'n_estimators': 109, 'learning_rate': 0.846886313092084}
