In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import optuna


In [None]:
from sklearn.model_selection import RepeatedStratifiedKFold
from lightgbm.callback import LightGBMPruningCallback
from sklearn.metrics import roc_auc_score


def objective(trial, X, y): 
    
    param_grid = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1),
        "num_iterations": trial.suggest_int("num_iterations", 100, 1000),
        "num_leaves": trial.suggest_int("num_leaves", 10, 100),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "max_bin": trial.suggest_int("max_bin", 100, 1000),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.1, 1, step = 0.1),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 10),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.1, 1, step = 0.1), 
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1, 9, step = 0.5),
        'lambda_l1': trial.suggest_float('lambda_l1', 0, 10, step = 0.1),
        'lambda_l2': trial.suggest_float('lambda_l2', 0, 10, step = 0.1),
        'verbose': -1
    }
    
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=5, random_state=42)
    
    cv_scores = []
    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        model = LGBMClassifier(objective="binary", **param_grid)
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="auc",
            early_stopping_rounds=500,
            callbacks=[
                LightGBMPruningCallback(trial, "auc")
            ]
        )
        
        pred = model.predict_proba(X_test)[:, 1]
        cv_scores.append(roc_auc_score(y_test, pred))

    return np.mean(cv_scores)

study = optuna.create_study(direction="maximize")
study.optimize(lambda trial: objective(trial, X, y), n_trials=200)