## Hist Gradient Boosting Hyperparameter Tuning (F1-Score)

In [2]:
# Load Libraries

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

from sklearn.ensemble import HistGradientBoostingClassifier

import optuna

In [3]:
# Load Data

df = pd.read_csv('bank_4.csv', index_col=0)

In [4]:
# Train / Test Split

X = df.drop(columns=['churn', 'complain', 'umap_1', 'umap_2'])
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [5]:
# Hyperparameter tuning

def objective(trial):
    
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 1.0),
        'max_iter': trial.suggest_int('max_iter', 100, 2000),
        'max_depth':trial.suggest_int('max_depth', 2, 50),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 32),
        'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 50),
        'l2_regularization': trial.suggest_float('l2_regularization', 0.0, 0.1)
    }

    model = HistGradientBoostingClassifier(
        **params,
        random_state=42)
    
    threshold = 0.31
    
    skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
    scores = []
    
    for tr, te in skf.split(X_train, y_train):
        
        X_tr, X_te = X_train.iloc[tr], X_train.iloc[te]
        y_tr, y_te = y_train.iloc[tr], y_train.iloc[te]
        
        model.fit(X_tr, y_tr)
        prob = model.predict_proba(X_te)[:, 1]
        y_pred = np.where(prob < threshold, 0, 1)
        
        scores.append(f1_score(y_te, y_pred))
        
    return np.mean(scores)

In [6]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100, show_progress_bar=True)

[I 2024-07-10 12:56:27,352] A new study created in memory with name: no-name-f2212109-5f0a-44ca-baff-94d407c9ea95


  0%|          | 0/100 [00:00<?, ?it/s]

[I 2024-07-10 12:56:47,837] Trial 0 finished with value: 0.5355727324851103 and parameters: {'learning_rate': 0.9303368771357591, 'max_iter': 1189, 'max_depth': 7, 'min_samples_leaf': 23, 'max_leaf_nodes': 49, 'l2_regularization': 0.02576623974108551}. Best is trial 0 with value: 0.5355727324851103.
[I 2024-07-10 12:56:51,867] Trial 1 finished with value: 0.5455820489302622 and parameters: {'learning_rate': 0.4224671881164837, 'max_iter': 739, 'max_depth': 42, 'min_samples_leaf': 18, 'max_leaf_nodes': 13, 'l2_regularization': 0.027413875476656736}. Best is trial 1 with value: 0.5455820489302622.
[I 2024-07-10 12:57:15,391] Trial 2 finished with value: 0.5478776165961077 and parameters: {'learning_rate': 0.9507839628099041, 'max_iter': 1496, 'max_depth': 16, 'min_samples_leaf': 31, 'max_leaf_nodes': 43, 'l2_regularization': 0.030954467043114543}. Best is trial 2 with value: 0.5478776165961077.
[I 2024-07-10 12:57:20,657] Trial 3 finished with value: 0.5633360706897017 and parameters: {'

In [12]:
print("Best trial:", study.best_trial)
print("Best hyperparameters:", study.best_params)

Best trial: FrozenTrial(number=23, state=1, values=[0.6180353184129596], datetime_start=datetime.datetime(2024, 7, 10, 12, 59, 51, 519177), datetime_complete=datetime.datetime(2024, 7, 10, 13, 0, 1, 2024), params={'learning_rate': 0.00853199483099034, 'max_iter': 897, 'max_depth': 37, 'min_samples_leaf': 9, 'max_leaf_nodes': 26, 'l2_regularization': 0.04861419258358948}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'learning_rate': FloatDistribution(high=1.0, log=False, low=0.001, step=None), 'max_iter': IntDistribution(high=2000, log=False, low=100, step=1), 'max_depth': IntDistribution(high=50, log=False, low=2, step=1), 'min_samples_leaf': IntDistribution(high=32, log=False, low=1, step=1), 'max_leaf_nodes': IntDistribution(high=50, log=False, low=10, step=1), 'l2_regularization': FloatDistribution(high=0.1, log=False, low=0.0, step=None)}, trial_id=23, value=None)
Best hyperparameters: {'learning_rate': 0.00853199483099034, 'max_iter': 897, 'max_depth': 37