In [18]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

from app.utils import CustomImputer, CyclicalFeatureTransformer

In [19]:
# 2. LOAD & PREPARE DATA
df = pd.read_csv("C:/Users/ASUS/PROGRAMMING LANGUAGE/ASAH_Capstone Project Fix/bank-additional-full.csv", sep= ';')
df.rename(columns = {'y':'deposit'}, inplace = True)
y = df['deposit'].map({'yes': 1, 'no': 0})
X = df.drop('deposit', axis=1)

# Fitur Final (10 Fitur Deployment)
final_features = [
    'euribor3m', 'nr.employed', 'age', 'cons.conf.idx', 'campaign',
    'poutcome', 'previous', 'job', 'education', 'month'
]

X_filtered = X[final_features].copy()
X_train, X_test, y_train, y_test = train_test_split(
    X_filtered, y, test_size=0.2, random_state=42, stratify=y
)

In [20]:
# 3. DEFINISI CUSTOM TRANSFORMERS
class CustomImputer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        X_copy = X.copy()
        if 'job' in X_copy.columns and 'age' in X_copy.columns:
            X_copy.loc[(X_copy['age']>60) & (X_copy['job']=='unknown'), 'job'] = 'retired'
        if 'education' in X_copy.columns and 'job' in X_copy.columns:
            X_copy.loc[(X_copy['education']=='unknown') & (X_copy['job']=='management'), 'education'] = 'university.degree'
            X_copy.loc[(X_copy['education']=='unknown') & (X_copy['job']=='services'), 'education'] = 'high.school'
            X_copy.loc[(X_copy['education']=='unknown') & (X_copy['job']=='housemaid'), 'education'] = 'basic.4y'
            X_copy.loc[(X_copy['job']=='unknown') & (X_copy['education']=='professional.course'), 'job'] = 'technician'
            basic_ed = ['basic.4y', 'basic.6y', 'basic.9y']
            X_copy.loc[(X_copy['job'] == 'unknown') & (X_copy['education'].isin(basic_ed)), 'job'] = 'blue-collar'
        return X_copy

class CyclicalFeatureTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        X_copy = X.copy()
        new_features = []
        if 'month' in X_copy.columns:
            month_map = {'mar':3, 'apr':4, 'may':5, 'jun':6, 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12}
            month_num = X_copy['month'].map(month_map).fillna(0)
            X_copy['month_sin'] = np.sin(2 * np.pi * month_num/12)
            X_copy['month_cos'] = np.cos(2 * np.pi * month_num/12)
            new_features.extend(['month_sin', 'month_cos'])
            X_copy.drop(columns=['month'], inplace=True)
        if new_features: return X_copy
        return X_copy
    def get_feature_names_out(self, input_features=None): return ['month_sin', 'month_cos']

# 4. PREPROCESSOR (SAMA SEPERTI KODE ANDA)
education_order = ['illiterate', 'basic.4y', 'basic.6y', 'basic.9y', 'high.school', 'professional.course', 'university.degree', 'unknown']
poutcome_order = ['nonexistent', 'failure', 'success']
sel_num_features = ['euribor3m', 'nr.employed', 'age', 'cons.conf.idx', 'campaign', 'previous']
sel_cat_ordinal  = ['education', 'poutcome']
sel_cat_onehot   = ['job']
sel_cyclic       = ['month']

preprocessor_deploy = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), sel_num_features),
        ('ord', OrdinalEncoder(categories=[education_order, poutcome_order], handle_unknown='use_encoded_value', unknown_value=-1), sel_cat_ordinal),
        ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False), sel_cat_onehot),
        ('cyclic', CyclicalFeatureTransformer(), sel_cyclic)
    ], remainder='drop', verbose_feature_names_out=False
)

In [21]:
# 5. DEFINISI MODEL

# Hitung bobot untuk XGBoost (karena tidak punya 'balanced' otomatis)
neg_count = (y_train == 0).sum()
pos_count = (y_train == 1).sum()
scale_pos_weight_val = neg_count / pos_count

models_deploy = {}

# --- A. Model Klasik ---
models_deploy["Logistic Regression"] = Pipeline(steps=[
    ('imputer', CustomImputer()),
    ('preprocessor', preprocessor_deploy),
    ('classifier', LogisticRegression(random_state=42, solver='liblinear', C=1.0, class_weight='balanced'))
])

models_deploy["Naive Bayes"] = Pipeline(steps=[
    ('imputer', CustomImputer()),
    ('preprocessor', preprocessor_deploy),
    ('classifier', GaussianNB())
])

models_deploy["Random Forest"] = Pipeline(steps=[
    ('imputer', CustomImputer()),
    ('preprocessor', preprocessor_deploy),
    ('classifier', RandomForestClassifier(random_state=42, n_estimators=200, class_weight='balanced'))
])

# --- B. Model Boosting ---
models_deploy["XGBoost"] = Pipeline(steps=[
    ('imputer', CustomImputer()),
    ('preprocessor', preprocessor_deploy),
    ('classifier', XGBClassifier(
        random_state=42,
        scale_pos_weight=scale_pos_weight_val, # Handle Imbalance
        use_label_encoder=False,
        eval_metric='logloss'
    ))
])

models_deploy["LightGBM"] = Pipeline(steps=[
    ('imputer', CustomImputer()),
    ('preprocessor', preprocessor_deploy),
    ('classifier', LGBMClassifier(
        random_state=42,
        class_weight='balanced', # Handle Imbalance
        verbose=-1
    ))
])

models_deploy["CatBoost"] = Pipeline(steps=[
    ('imputer', CustomImputer()),
    ('preprocessor', preprocessor_deploy),
    ('classifier', CatBoostClassifier(
        random_state=42,
        auto_class_weights='Balanced', # Handle Imbalance
        verbose=0
    ))
])

In [22]:
# 6. TRAINING & EVALUASI OTOMATIS

best_auc = 0
best_model_name = ""
best_model_pipeline = None
results_summary = []

for name, model in models_deploy.items():
    print(f"\nTraining {name}...")
    model.fit(X_train, y_train)

    # Prediksi
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    # Metrik
    auc = roc_auc_score(y_test, y_proba)

    print(f"--- Laporan: {name} ---")
    print(classification_report(y_test, y_pred, target_names=['No Deposit', 'Deposit']))
    print(f"AUC Score: {auc:.4f}")

    results_summary.append({'Model': name, 'AUC': auc})

    # Cek Terbaik
    if auc > best_auc:
        best_auc = auc
        best_model_name = name
        best_model_pipeline = model


Training Logistic Regression...
--- Laporan: Logistic Regression ---
              precision    recall  f1-score   support

  No Deposit       0.95      0.76      0.85      7310
     Deposit       0.28      0.72      0.40       928

    accuracy                           0.76      8238
   macro avg       0.62      0.74      0.62      8238
weighted avg       0.88      0.76      0.80      8238

AUC Score: 0.7797

Training Naive Bayes...
--- Laporan: Naive Bayes ---
              precision    recall  f1-score   support

  No Deposit       0.93      0.86      0.89      7310
     Deposit       0.30      0.47      0.36       928

    accuracy                           0.81      8238
   macro avg       0.61      0.66      0.63      8238
weighted avg       0.86      0.81      0.83      8238

AUC Score: 0.7500

Training Random Forest...
--- Laporan: Random Forest ---
              precision    recall  f1-score   support

  No Deposit       0.92      0.95      0.93      7310
     Deposit       

In [23]:
# 7. RINGKASAN HASIL

print(f" MODEL TERBAIK: {best_model_name}")
print(f" AUC Score: {best_auc:.4f}")
print("\Hasil Keseluruhan Model:")
print(pd.DataFrame(results_summary).sort_values(by='AUC', ascending=False))

 MODEL TERBAIK: LightGBM
 AUC Score: 0.8095
\Hasil Keseluruhan Model:
                 Model       AUC
4             LightGBM  0.809540
5             CatBoost  0.801865
3              XGBoost  0.787214
0  Logistic Regression  0.779726
2        Random Forest  0.768094
1          Naive Bayes  0.750037


In [24]:
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import cross_val_score
from lightgbm import LGBMClassifier

# KONSTANTA TUNING
N_TRIALS = 50
RANDOM_STATE = 42
SCORING_METRIC = 'roc_auc' # Metrik yang dioptimalkan


def objective(trial):
    """Fungsi yang dioptimalkan oleh Optuna."""

    # 1. Definisi Parameter Space untuk LightGBM (Leaf-Wise Control)
    lgbm_params = {
        'n_estimators': trial.suggest_int('n_estimators', 300, 1000),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),       # Kunci Kontrol Leaf-Wise
        'max_depth': trial.suggest_int('max_depth', 4, 10),           # Batasan eksplisit
        'min_child_samples': trial.suggest_int('min_child_samples', 20, 200), # Sama dengan min_data_in_leaf
        'subsample': trial.suggest_uniform('subsample', 0.6, 1.0),    # Mencegah overfitting
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.6, 1.0), # Mencegah overfitting
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10.0), # L1 regularization
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10.0), # L2 regularization
        'random_state': RANDOM_STATE,
        'class_weight': 'balanced',
        'n_jobs': -1,
        'verbose': -1
    }

    # 2. Pipeline Model
    model = Pipeline(steps=[
        ('imputer', CustomImputer()),
        ('preprocessor', preprocessor_deploy),
        ('classifier', LGBMClassifier(**lgbm_params))
    ])

    # 3. Evaluasi menggunakan Cross-Validation
    score = cross_val_score(
        model,
        X_train,
        y_train,
        cv=3,
        scoring=SCORING_METRIC
    )

    return score.mean()     # Optuna akan meminimalkan/memaksimalkan rata-rata skor

In [25]:
# 5. EKSEKUSI STUDI OPTUNA

# Sampler TPE (Tree-structured Parzen Estimator) adalah default Optuna yang efisien
sampler = TPESampler(seed=RANDOM_STATE)
study = optuna.create_study(direction="maximize", sampler=sampler)

print(f"Memulai Optuna dengan {N_TRIALS} percobaan")
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)


# 6. HASIL AKHIR

print("\n HASIL TUNING LIGHTGBM DENGAN OPTUNA")
print(f"AUC Score Terbaik (CV): {study.best_value:.4f}")
print(f"Kombinasi Parameter Terbaik:\n{study.best_params}")

# Ambil Pipeline Model Final (Dilatih menggunakan seluruh X_train)
best_params = study.best_params
best_params['random_state'] = RANDOM_STATE
best_params['class_weight'] = 'balanced'
best_params['n_jobs'] = -1
best_params['verbose'] = -1

final_lgbm_pipeline = Pipeline(steps=[
    ('imputer', CustomImputer()),
    ('preprocessor', preprocessor_deploy),
    ('classifier', LGBMClassifier(**best_params))
])

# Latih Model Final dengan Seluruh Data Training
final_lgbm_pipeline.fit(X_train, y_train)

# Evaluasi pada Test Set (Validasi Akhir)
y_proba_final = final_lgbm_pipeline.predict_proba(X_test)[:, 1]
final_auc = roc_auc_score(y_test, y_proba_final)

print(f"\nAUC Score Final pada Data Test: {final_auc:.4f}")


[I 2025-12-08 21:13:18,226] A new study created in memory with name: no-name-b862e902-158f-45f4-8f3c-a59d371d854a


Memulai Optuna dengan 50 percobaan


Best trial: 0. Best value: 0.773251:   2%|▏         | 1/50 [00:03<02:35,  3.18s/it]

[I 2025-12-08 21:13:21,409] Trial 0 finished with value: 0.7732512568416866 and parameters: {'n_estimators': 562, 'learning_rate': 0.07969454818643935, 'num_leaves': 79, 'max_depth': 8, 'min_child_samples': 48, 'subsample': 0.662397808134481, 'colsample_bytree': 0.6232334448672797, 'reg_alpha': 2.9154431891537547, 'reg_lambda': 0.2537815508265665}. Best is trial 0 with value: 0.7732512568416866.


Best trial: 1. Best value: 0.790959:   4%|▍         | 2/50 [00:09<03:50,  4.81s/it]

[I 2025-12-08 21:13:27,359] Trial 1 finished with value: 0.7909587512676178 and parameters: {'n_estimators': 796, 'learning_rate': 0.0010994335574766201, 'num_leaves': 98, 'max_depth': 9, 'min_child_samples': 58, 'subsample': 0.6727299868828402, 'colsample_bytree': 0.6733618039413735, 'reg_alpha': 0.016480446427978974, 'reg_lambda': 0.12561043700013558}. Best is trial 1 with value: 0.7909587512676178.


Best trial: 1. Best value: 0.790959:   6%|▌         | 3/50 [00:10<02:32,  3.25s/it]

[I 2025-12-08 21:13:28,766] Trial 2 finished with value: 0.7903197883611761 and parameters: {'n_estimators': 602, 'learning_rate': 0.0038234752246751854, 'num_leaves': 69, 'max_depth': 4, 'min_child_samples': 72, 'subsample': 0.7465447373174767, 'colsample_bytree': 0.7824279936868144, 'reg_alpha': 1.382623217936987, 'reg_lambda': 0.006290644294586149}. Best is trial 1 with value: 0.7909587512676178.


Best trial: 3. Best value: 0.791603:   8%|▊         | 4/50 [00:12<02:14,  2.92s/it]

[I 2025-12-08 21:13:31,166] Trial 3 finished with value: 0.791602750058994 and parameters: {'n_estimators': 660, 'learning_rate': 0.015304852121831466, 'num_leaves': 23, 'max_depth': 8, 'min_child_samples': 50, 'subsample': 0.6260206371941118, 'colsample_bytree': 0.9795542149013333, 'reg_alpha': 7.2866537374910445, 'reg_lambda': 1.7123375973163988}. Best is trial 3 with value: 0.791602750058994.


Best trial: 3. Best value: 0.791603:  10%|█         | 5/50 [00:14<01:57,  2.61s/it]

[I 2025-12-08 21:13:33,234] Trial 4 finished with value: 0.7909875944463733 and parameters: {'n_estimators': 513, 'learning_rate': 0.0015679933916723015, 'num_leaves': 75, 'max_depth': 7, 'min_child_samples': 42, 'subsample': 0.798070764044508, 'colsample_bytree': 0.6137554084460873, 'reg_alpha': 4.337920697490942, 'reg_lambda': 0.010842262717330166}. Best is trial 3 with value: 0.791602750058994.


Best trial: 5. Best value: 0.791737:  12%|█▏        | 6/50 [00:18<02:01,  2.77s/it]

[I 2025-12-08 21:13:36,318] Trial 5 finished with value: 0.7917369942605156 and parameters: {'n_estimators': 764, 'learning_rate': 0.004201672054372531, 'num_leaves': 62, 'max_depth': 7, 'min_child_samples': 53, 'subsample': 0.9878338511058234, 'colsample_bytree': 0.9100531293444458, 'reg_alpha': 5.727904470799623, 'reg_lambda': 3.7958531426706403}. Best is trial 5 with value: 0.7917369942605156.


Best trial: 5. Best value: 0.791737:  14%|█▍        | 7/50 [00:20<01:55,  2.69s/it]

[I 2025-12-08 21:13:38,854] Trial 6 finished with value: 0.7743736347299149 and parameters: {'n_estimators': 719, 'learning_rate': 0.06978281265126034, 'num_leaves': 27, 'max_depth': 5, 'min_child_samples': 28, 'subsample': 0.7301321323053057, 'colsample_bytree': 0.7554709158757928, 'reg_alpha': 0.01217295809836997, 'reg_lambda': 2.0651425578959257}. Best is trial 5 with value: 0.7917369942605156.


Best trial: 5. Best value: 0.791737:  16%|█▌        | 8/50 [00:22<01:35,  2.29s/it]

[I 2025-12-08 21:13:40,264] Trial 7 finished with value: 0.7892304738181073 and parameters: {'n_estimators': 550, 'learning_rate': 0.0036464395589807202, 'num_leaves': 63, 'max_depth': 4, 'min_child_samples': 165, 'subsample': 0.6298202574719083, 'colsample_bytree': 0.9947547746402069, 'reg_alpha': 1.2273800987852967, 'reg_lambda': 0.0062353771356731605}. Best is trial 5 with value: 0.7917369942605156.


Best trial: 5. Best value: 0.791737:  18%|█▊        | 9/50 [00:23<01:25,  2.08s/it]

[I 2025-12-08 21:13:41,882] Trial 8 finished with value: 0.7867547356726652 and parameters: {'n_estimators': 303, 'learning_rate': 0.04274869455295218, 'num_leaves': 77, 'max_depth': 9, 'min_child_samples': 159, 'subsample': 0.6296178606936361, 'colsample_bytree': 0.7433862914177091, 'reg_alpha': 0.0029072088906598446, 'reg_lambda': 2.8340904295147746}. Best is trial 5 with value: 0.7917369942605156.


Best trial: 9. Best value: 0.792288:  20%|██        | 10/50 [00:25<01:24,  2.12s/it]

[I 2025-12-08 21:13:44,112] Trial 9 finished with value: 0.7922877239997265 and parameters: {'n_estimators': 736, 'learning_rate': 0.004589824181495649, 'num_leaves': 25, 'max_depth': 6, 'min_child_samples': 78, 'subsample': 0.8918424713352255, 'colsample_bytree': 0.8550229885420852, 'reg_alpha': 3.53875886477924, 'reg_lambda': 0.0774211647399625}. Best is trial 9 with value: 0.7922877239997265.


Best trial: 9. Best value: 0.792288:  22%|██▏       | 11/50 [00:28<01:28,  2.27s/it]

[I 2025-12-08 21:13:46,724] Trial 10 finished with value: 0.7884898696446832 and parameters: {'n_estimators': 971, 'learning_rate': 0.015186260710028772, 'num_leaves': 40, 'max_depth': 6, 'min_child_samples': 110, 'subsample': 0.9407901565064135, 'colsample_bytree': 0.8617506091083013, 'reg_alpha': 0.29845037463029467, 'reg_lambda': 0.030762193887012002}. Best is trial 9 with value: 0.7922877239997265.


Best trial: 11. Best value: 0.793052:  24%|██▍       | 12/50 [00:31<01:40,  2.64s/it]

[I 2025-12-08 21:13:50,209] Trial 11 finished with value: 0.7930518472943676 and parameters: {'n_estimators': 891, 'learning_rate': 0.004810459427172161, 'num_leaves': 48, 'max_depth': 6, 'min_child_samples': 98, 'subsample': 0.975925571746833, 'colsample_bytree': 0.8743188681917087, 'reg_alpha': 0.26837193414400196, 'reg_lambda': 0.3565483620389866}. Best is trial 11 with value: 0.7930518472943676.


Best trial: 11. Best value: 0.793052:  26%|██▌       | 13/50 [00:34<01:37,  2.65s/it]

[I 2025-12-08 21:13:52,871] Trial 12 finished with value: 0.7921366580043996 and parameters: {'n_estimators': 921, 'learning_rate': 0.006836608214214818, 'num_leaves': 44, 'max_depth': 6, 'min_child_samples': 99, 'subsample': 0.8886964891340507, 'colsample_bytree': 0.8510571781023799, 'reg_alpha': 0.17625374295088747, 'reg_lambda': 0.4190389366601127}. Best is trial 11 with value: 0.7930518472943676.


Best trial: 11. Best value: 0.793052:  28%|██▊       | 14/50 [00:37<01:37,  2.71s/it]

[I 2025-12-08 21:13:55,725] Trial 13 finished with value: 0.7918948469183249 and parameters: {'n_estimators': 891, 'learning_rate': 0.0021999246042774014, 'num_leaves': 46, 'max_depth': 6, 'min_child_samples': 124, 'subsample': 0.8779507786146067, 'colsample_bytree': 0.9079962782069043, 'reg_alpha': 0.5661324486824431, 'reg_lambda': 0.07008407697515183}. Best is trial 11 with value: 0.7930518472943676.


Best trial: 14. Best value: 0.793343:  30%|███       | 15/50 [00:39<01:29,  2.56s/it]

[I 2025-12-08 21:13:57,928] Trial 14 finished with value: 0.7933431729016999 and parameters: {'n_estimators': 846, 'learning_rate': 0.008452534614636504, 'num_leaves': 33, 'max_depth': 5, 'min_child_samples': 86, 'subsample': 0.9006453699263156, 'colsample_bytree': 0.8369699955560163, 'reg_alpha': 0.05335668109210608, 'reg_lambda': 0.6602498791787774}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  32%|███▏      | 16/50 [00:41<01:22,  2.42s/it]

[I 2025-12-08 21:14:00,027] Trial 15 finished with value: 0.791643565488411 and parameters: {'n_estimators': 852, 'learning_rate': 0.014425540650734167, 'num_leaves': 36, 'max_depth': 5, 'min_child_samples': 136, 'subsample': 0.9932813171758177, 'colsample_bytree': 0.9286105839017185, 'reg_alpha': 0.026656530844523643, 'reg_lambda': 0.6206514782474425}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  34%|███▍      | 17/50 [00:44<01:21,  2.46s/it]

[I 2025-12-08 21:14:02,566] Trial 16 finished with value: 0.7922401435269144 and parameters: {'n_estimators': 987, 'learning_rate': 0.009034810518835333, 'num_leaves': 53, 'max_depth': 5, 'min_child_samples': 90, 'subsample': 0.9378871950705064, 'colsample_bytree': 0.8144090954172367, 'reg_alpha': 0.0472748163384166, 'reg_lambda': 0.001129059573387677}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  36%|███▌      | 18/50 [00:47<01:22,  2.57s/it]

[I 2025-12-08 21:14:05,392] Trial 17 finished with value: 0.7849798626471628 and parameters: {'n_estimators': 836, 'learning_rate': 0.02389520940938864, 'num_leaves': 32, 'max_depth': 10, 'min_child_samples': 144, 'subsample': 0.8509321290367471, 'colsample_bytree': 0.7113349365124932, 'reg_alpha': 0.08534681293838127, 'reg_lambda': 9.371621681350312}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  38%|███▊      | 19/50 [00:48<01:05,  2.13s/it]

[I 2025-12-08 21:14:06,494] Trial 18 finished with value: 0.7912542996655719 and parameters: {'n_estimators': 443, 'learning_rate': 0.00732656917249991, 'num_leaves': 52, 'max_depth': 4, 'min_child_samples': 119, 'subsample': 0.9357555842324451, 'colsample_bytree': 0.8255898796085731, 'reg_alpha': 0.005119740996841137, 'reg_lambda': 0.6840553669268351}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  40%|████      | 20/50 [00:50<01:06,  2.22s/it]

[I 2025-12-08 21:14:08,939] Trial 19 finished with value: 0.7914110713801931 and parameters: {'n_estimators': 903, 'learning_rate': 0.002546089300100258, 'num_leaves': 52, 'max_depth': 5, 'min_child_samples': 90, 'subsample': 0.8122387609410768, 'colsample_bytree': 0.9417276066816138, 'reg_alpha': 0.10318131126011343, 'reg_lambda': 0.2584575236620367}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  42%|████▏     | 21/50 [00:52<01:02,  2.15s/it]

[I 2025-12-08 21:14:10,926] Trial 20 finished with value: 0.7872945433396378 and parameters: {'n_estimators': 667, 'learning_rate': 0.03081929803347151, 'num_leaves': 89, 'max_depth': 7, 'min_child_samples': 194, 'subsample': 0.9614035607563236, 'colsample_bytree': 0.8845851510523767, 'reg_alpha': 0.0011129709953152695, 'reg_lambda': 0.026154910812411912}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  44%|████▍     | 22/50 [00:54<00:59,  2.13s/it]

[I 2025-12-08 21:14:12,995] Trial 21 finished with value: 0.7923626987927844 and parameters: {'n_estimators': 741, 'learning_rate': 0.005387034728193539, 'num_leaves': 21, 'max_depth': 6, 'min_child_samples': 75, 'subsample': 0.8912230190030175, 'colsample_bytree': 0.8466939141943974, 'reg_alpha': 0.3599685531113075, 'reg_lambda': 0.08859440675285886}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  46%|████▌     | 23/50 [00:57<01:00,  2.23s/it]

[I 2025-12-08 21:14:15,475] Trial 22 finished with value: 0.7932311352547025 and parameters: {'n_estimators': 815, 'learning_rate': 0.005905246280927669, 'num_leaves': 32, 'max_depth': 6, 'min_child_samples': 71, 'subsample': 0.8436964947113367, 'colsample_bytree': 0.7931685524858315, 'reg_alpha': 0.4796683001752543, 'reg_lambda': 1.1026920564504459}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  48%|████▊     | 24/50 [00:59<00:59,  2.28s/it]

[I 2025-12-08 21:14:17,865] Trial 23 finished with value: 0.7918647834336712 and parameters: {'n_estimators': 810, 'learning_rate': 0.0026392203006626747, 'num_leaves': 33, 'max_depth': 5, 'min_child_samples': 106, 'subsample': 0.8385107447599782, 'colsample_bytree': 0.7721442367060215, 'reg_alpha': 1.0696927801824256, 'reg_lambda': 1.010073562094813}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  50%|█████     | 25/50 [01:02<00:58,  2.34s/it]

[I 2025-12-08 21:14:20,340] Trial 24 finished with value: 0.7910570376230228 and parameters: {'n_estimators': 870, 'learning_rate': 0.011723945433261098, 'num_leaves': 42, 'max_depth': 6, 'min_child_samples': 70, 'subsample': 0.7667894451802009, 'colsample_bytree': 0.806251035477022, 'reg_alpha': 0.1596973748411324, 'reg_lambda': 6.212109730041276}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  52%|█████▏    | 26/50 [01:05<01:00,  2.51s/it]

[I 2025-12-08 21:14:23,263] Trial 25 finished with value: 0.7916407486866318 and parameters: {'n_estimators': 946, 'learning_rate': 0.006556138867281358, 'num_leaves': 31, 'max_depth': 7, 'min_child_samples': 91, 'subsample': 0.9225159463794992, 'colsample_bytree': 0.7170854336427618, 'reg_alpha': 0.04766916451540558, 'reg_lambda': 1.0447634388812208}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  54%|█████▍    | 27/50 [01:07<00:58,  2.56s/it]

[I 2025-12-08 21:14:25,928] Trial 26 finished with value: 0.7898282567857101 and parameters: {'n_estimators': 796, 'learning_rate': 0.009695675206420215, 'num_leaves': 38, 'max_depth': 8, 'min_child_samples': 125, 'subsample': 0.8594411341448177, 'colsample_bytree': 0.8816383367503096, 'reg_alpha': 0.5213570240351966, 'reg_lambda': 0.244943562699294}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  56%|█████▌    | 28/50 [01:10<00:55,  2.53s/it]

[I 2025-12-08 21:14:28,392] Trial 27 finished with value: 0.7839548763231047 and parameters: {'n_estimators': 996, 'learning_rate': 0.02327748277184866, 'num_leaves': 49, 'max_depth': 5, 'min_child_samples': 20, 'subsample': 0.9693110338855696, 'colsample_bytree': 0.7889135762773009, 'reg_alpha': 0.2006993688111294, 'reg_lambda': 0.3914026502632381}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  58%|█████▊    | 29/50 [01:11<00:48,  2.30s/it]

[I 2025-12-08 21:14:30,150] Trial 28 finished with value: 0.78927665875428 and parameters: {'n_estimators': 695, 'learning_rate': 0.0030089668225756886, 'num_leaves': 59, 'max_depth': 4, 'min_child_samples': 61, 'subsample': 0.9135600300881614, 'colsample_bytree': 0.9515786651330977, 'reg_alpha': 0.06567172518114968, 'reg_lambda': 1.216259947883864}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  60%|██████    | 30/50 [01:14<00:48,  2.43s/it]

[I 2025-12-08 21:14:32,888] Trial 29 finished with value: 0.7922539238859053 and parameters: {'n_estimators': 862, 'learning_rate': 0.0017742671913951403, 'num_leaves': 36, 'max_depth': 6, 'min_child_samples': 39, 'subsample': 0.8217026057287844, 'colsample_bytree': 0.6781090679577798, 'reg_alpha': 2.287461665842602, 'reg_lambda': 0.153894539420218}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  62%|██████▏   | 31/50 [01:19<01:00,  3.18s/it]

[I 2025-12-08 21:14:37,825] Trial 30 finished with value: 0.7919346256700811 and parameters: {'n_estimators': 938, 'learning_rate': 0.00548908670605779, 'num_leaves': 56, 'max_depth': 7, 'min_child_samples': 103, 'subsample': 0.7854289132795064, 'colsample_bytree': 0.8253006399424533, 'reg_alpha': 0.6516833044575154, 'reg_lambda': 3.67988594185217}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 14. Best value: 0.793343:  64%|██████▍   | 32/50 [01:23<01:01,  3.43s/it]

[I 2025-12-08 21:14:41,823] Trial 31 finished with value: 0.7923362522841377 and parameters: {'n_estimators': 756, 'learning_rate': 0.005931060083020878, 'num_leaves': 21, 'max_depth': 6, 'min_child_samples': 76, 'subsample': 0.8989312133179788, 'colsample_bytree': 0.8421590249382996, 'reg_alpha': 0.30612368672103946, 'reg_lambda': 0.04888543382223385}. Best is trial 14 with value: 0.7933431729016999.


Best trial: 32. Best value: 0.793604:  66%|██████▌   | 33/50 [01:25<00:52,  3.07s/it]

[I 2025-12-08 21:14:44,059] Trial 32 finished with value: 0.793604173473625 and parameters: {'n_estimators': 794, 'learning_rate': 0.008881822912096228, 'num_leaves': 29, 'max_depth': 5, 'min_child_samples': 84, 'subsample': 0.8677638295483068, 'colsample_bytree': 0.8766480738100595, 'reg_alpha': 0.43053528985758927, 'reg_lambda': 0.25327579930230437}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  68%|██████▊   | 34/50 [01:28<00:45,  2.82s/it]

[I 2025-12-08 21:14:46,309] Trial 33 finished with value: 0.7925890801376255 and parameters: {'n_estimators': 815, 'learning_rate': 0.008060636064085724, 'num_leaves': 29, 'max_depth': 5, 'min_child_samples': 63, 'subsample': 0.8646594035293843, 'colsample_bytree': 0.8938163874306501, 'reg_alpha': 0.02250165660513912, 'reg_lambda': 0.19469424970543187}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  70%|███████   | 35/50 [01:30<00:38,  2.56s/it]

[I 2025-12-08 21:14:48,264] Trial 34 finished with value: 0.7927217856430571 and parameters: {'n_estimators': 796, 'learning_rate': 0.012038715346042388, 'num_leaves': 28, 'max_depth': 4, 'min_child_samples': 84, 'subsample': 0.8357808148933928, 'colsample_bytree': 0.8682456569636883, 'reg_alpha': 2.100218920703062, 'reg_lambda': 0.4471189444988687}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  72%|███████▏  | 36/50 [01:32<00:35,  2.52s/it]

[I 2025-12-08 21:14:50,686] Trial 35 finished with value: 0.7894361197225331 and parameters: {'n_estimators': 891, 'learning_rate': 0.01862600937354202, 'num_leaves': 46, 'max_depth': 5, 'min_child_samples': 97, 'subsample': 0.7124756259501557, 'colsample_bytree': 0.7710047978294572, 'reg_alpha': 0.1212544624010962, 'reg_lambda': 0.8705276219665496}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  74%|███████▍  | 37/50 [01:34<00:29,  2.26s/it]

[I 2025-12-08 21:14:52,339] Trial 36 finished with value: 0.790361836904907 and parameters: {'n_estimators': 690, 'learning_rate': 0.0034152210721694, 'num_leaves': 99, 'max_depth': 4, 'min_child_samples': 65, 'subsample': 0.9542994194818036, 'colsample_bytree': 0.801297994494541, 'reg_alpha': 0.009803149188709743, 'reg_lambda': 1.5159613160223306}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  76%|███████▌  | 38/50 [01:36<00:27,  2.28s/it]

[I 2025-12-08 21:14:54,676] Trial 37 finished with value: 0.791935432008429 and parameters: {'n_estimators': 616, 'learning_rate': 0.0011360826146721123, 'num_leaves': 35, 'max_depth': 8, 'min_child_samples': 112, 'subsample': 0.9149420772119949, 'colsample_bytree': 0.6426255301412166, 'reg_alpha': 1.1169761684559187, 'reg_lambda': 0.3212546656295636}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  78%|███████▊  | 39/50 [01:39<00:27,  2.49s/it]

[I 2025-12-08 21:14:57,644] Trial 38 finished with value: 0.7869224739483897 and parameters: {'n_estimators': 766, 'learning_rate': 0.01091607390390834, 'num_leaves': 69, 'max_depth': 7, 'min_child_samples': 48, 'subsample': 0.7823079008005829, 'colsample_bytree': 0.9645374376534528, 'reg_alpha': 0.04030248855649807, 'reg_lambda': 0.13597746278926545}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  80%|████████  | 40/50 [01:41<00:22,  2.25s/it]

[I 2025-12-08 21:14:59,339] Trial 39 finished with value: 0.7915506668194485 and parameters: {'n_estimators': 611, 'learning_rate': 0.003967141953927089, 'num_leaves': 40, 'max_depth': 5, 'min_child_samples': 82, 'subsample': 0.9788011653584434, 'colsample_bytree': 0.9122295461788753, 'reg_alpha': 0.8739629549460575, 'reg_lambda': 2.4518351412796604}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  82%|████████▏ | 41/50 [01:43<00:21,  2.33s/it]

[I 2025-12-08 21:15:01,871] Trial 40 finished with value: 0.7924835599418752 and parameters: {'n_estimators': 824, 'learning_rate': 0.005077885216531882, 'num_leaves': 25, 'max_depth': 7, 'min_child_samples': 35, 'subsample': 0.8672113500722914, 'colsample_bytree': 0.7356999408165396, 'reg_alpha': 0.22119174850569476, 'reg_lambda': 0.581496732610803}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  84%|████████▍ | 42/50 [01:45<00:17,  2.17s/it]

[I 2025-12-08 21:15:03,666] Trial 41 finished with value: 0.7927976298338558 and parameters: {'n_estimators': 777, 'learning_rate': 0.01162711824588847, 'num_leaves': 28, 'max_depth': 4, 'min_child_samples': 85, 'subsample': 0.834791846895888, 'colsample_bytree': 0.8748842492495719, 'reg_alpha': 8.010446123099014, 'reg_lambda': 0.4239873969501243}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  86%|████████▌ | 43/50 [01:47<00:14,  2.03s/it]

[I 2025-12-08 21:15:05,370] Trial 42 finished with value: 0.7929273948977702 and parameters: {'n_estimators': 769, 'learning_rate': 0.00965994698103269, 'num_leaves': 26, 'max_depth': 4, 'min_child_samples': 53, 'subsample': 0.823158541795512, 'colsample_bytree': 0.8771395026430179, 'reg_alpha': 2.0317538462774913, 'reg_lambda': 1.7116239207494957}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  88%|████████▊ | 44/50 [01:49<00:11,  2.00s/it]

[I 2025-12-08 21:15:07,291] Trial 43 finished with value: 0.7928096734632385 and parameters: {'n_estimators': 852, 'learning_rate': 0.007826097736143076, 'num_leaves': 20, 'max_depth': 4, 'min_child_samples': 56, 'subsample': 0.8090979904615181, 'colsample_bytree': 0.8356301140092558, 'reg_alpha': 2.597185636625333, 'reg_lambda': 1.6565319123462836}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  90%|█████████ | 45/50 [01:51<00:10,  2.02s/it]

[I 2025-12-08 21:15:09,370] Trial 44 finished with value: 0.7924344508790027 and parameters: {'n_estimators': 725, 'learning_rate': 0.004627904709839804, 'num_leaves': 25, 'max_depth': 5, 'min_child_samples': 70, 'subsample': 0.7511227917885789, 'colsample_bytree': 0.9030177773099609, 'reg_alpha': 0.45543051183088634, 'reg_lambda': 4.9083999436455095}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  92%|█████████▏| 46/50 [01:55<00:10,  2.63s/it]

[I 2025-12-08 21:15:13,403] Trial 45 finished with value: 0.7877030520818663 and parameters: {'n_estimators': 908, 'learning_rate': 0.017552666857500246, 'num_leaves': 33, 'max_depth': 6, 'min_child_samples': 51, 'subsample': 0.8472688642873125, 'colsample_bytree': 0.9336168559743598, 'reg_alpha': 4.4264428568875696, 'reg_lambda': 2.3498904779123504}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  94%|█████████▍| 47/50 [01:58<00:08,  2.79s/it]

[I 2025-12-08 21:15:16,588] Trial 46 finished with value: 0.7925254033722838 and parameters: {'n_estimators': 957, 'learning_rate': 0.009114261731464865, 'num_leaves': 39, 'max_depth': 5, 'min_child_samples': 98, 'subsample': 0.7929984646014201, 'colsample_bytree': 0.818779854698421, 'reg_alpha': 1.3603194217225292, 'reg_lambda': 0.8125546182390857}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  96%|█████████▌| 48/50 [02:02<00:06,  3.18s/it]

[I 2025-12-08 21:15:20,664] Trial 47 finished with value: 0.7923811065967667 and parameters: {'n_estimators': 875, 'learning_rate': 0.004124613891169097, 'num_leaves': 44, 'max_depth': 6, 'min_child_samples': 45, 'subsample': 0.9995336830201922, 'colsample_bytree': 0.863467294054376, 'reg_alpha': 1.6868919208719138, 'reg_lambda': 0.1930737250440037}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604:  98%|█████████▊| 49/50 [02:04<00:02,  2.95s/it]

[I 2025-12-08 21:15:23,092] Trial 48 finished with value: 0.7872186105731164 and parameters: {'n_estimators': 698, 'learning_rate': 0.05571612526848566, 'num_leaves': 25, 'max_depth': 4, 'min_child_samples': 113, 'subsample': 0.8763802424773005, 'colsample_bytree': 0.9206747679419996, 'reg_alpha': 0.8581072966250312, 'reg_lambda': 1.3984543772264393}. Best is trial 32 with value: 0.793604173473625.


Best trial: 32. Best value: 0.793604: 100%|██████████| 50/50 [02:06<00:00,  2.53s/it]


[I 2025-12-08 21:15:24,819] Trial 49 finished with value: 0.7917507590528704 and parameters: {'n_estimators': 483, 'learning_rate': 0.006360801274153528, 'num_leaves': 84, 'max_depth': 5, 'min_child_samples': 30, 'subsample': 0.6923545999675519, 'colsample_bytree': 0.8952021171061302, 'reg_alpha': 0.273373862889253, 'reg_lambda': 0.570069243932888}. Best is trial 32 with value: 0.793604173473625.

 HASIL TUNING LIGHTGBM DENGAN OPTUNA
AUC Score Terbaik (CV): 0.7936
Kombinasi Parameter Terbaik:
{'n_estimators': 794, 'learning_rate': 0.008881822912096228, 'num_leaves': 29, 'max_depth': 5, 'min_child_samples': 84, 'subsample': 0.8677638295483068, 'colsample_bytree': 0.8766480738100595, 'reg_alpha': 0.43053528985758927, 'reg_lambda': 0.25327579930230437}

AUC Score Final pada Data Test: 0.8121


In [15]:
import joblib
joblib.dump(final_lgbm_pipeline, 'model_lead_scoring_final_deployment.joblib')

['model_lead_scoring_final_deployment.joblib']

In [14]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [26]:
joblib.dump(final_lgbm_pipeline, 'model/model_lead_scoring_final_deployment.joblib')

['model/model_lead_scoring_final_deployment.joblib']

In [27]:
joblib.dump(final_lgbm_pipeline, "app/model.pkl")

['app/model.pkl']