In [235]:
import pandas as pd
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
from sklearn.metrics import classification_report, roc_auc_score

# Load the processed dataset
df = pd.read_csv('Dataset/train_processed.csv')
test_df = pd.read_csv('Dataset/test_processed.csv')

bool_cols = df.select_dtypes(include='bool').columns
df[bool_cols] = df[bool_cols].astype(int)
bool_cols_test = test_df.select_dtypes(include='bool').columns
test_df[bool_cols_test] = test_df[bool_cols_test].astype(int)

In [236]:
# Compute pair frequency (A → B)
account_pairs = df.groupby(['From Account', 'To Account']).size().reset_index(name='pair_frequency')

# Compute reverse pair frequency (B → A)
reverse_pairs = account_pairs.copy()
reverse_pairs.columns = ['To Account', 'From Account', 'reverse_pair_frequency']  # flip column names

# Merge both into the original dataframe
df = df.merge(account_pairs, on=['From Account', 'To Account'], how='left')
df = df.merge(reverse_pairs, on=['From Account', 'To Account'], how='left')  # correct merge

# Fill missing frequencies
df['pair_frequency'].fillna(1, inplace=True)
df['reverse_pair_frequency'].fillna(0, inplace=True)

# Flag circular transactions
df['is_circular'] = (df['reverse_pair_frequency'] > 0).astype(int)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['pair_frequency'].fillna(1, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['reverse_pair_frequency'].fillna(0, inplace=True)


In [237]:
# Compute pair frequency (A → B)
account_pairs = test_df.groupby(['From Account', 'To Account']).size().reset_index(name='pair_frequency')

# Compute reverse pair frequency (B → A)
reverse_pairs = account_pairs.copy()
reverse_pairs.columns = ['To Account', 'From Account', 'reverse_pair_frequency']  # flip column names

# Merge both into the original dataframe
test_df = test_df.merge(account_pairs, on=['From Account', 'To Account'], how='left')
test_df = test_df.merge(reverse_pairs, on=['From Account', 'To Account'], how='left')  # correct merge

# Fill missing frequencies
test_df['pair_frequency'].fillna(1, inplace=True)
test_df['reverse_pair_frequency'].fillna(0, inplace=True)

# Flag circular transactions
test_df['is_circular'] = (test_df['reverse_pair_frequency'] > 0).astype(int)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['pair_frequency'].fillna(1, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['reverse_pair_frequency'].fillna(0, inplace=True)


In [238]:
import  networkx as nx
# Initialize a directed graph
G = nx.DiGraph()

# Add edges with attributes
for idx, row in df.iterrows():
    G.add_edge(
        row['From Account'],
        row['To Account'],
        amount_log=row['Amount Paid_log'],
        pair_frequency=row['pair_frequency'],
        reverse_pair_frequency=row['reverse_pair_frequency'],
        is_circular=row['is_circular']
    )


In [239]:
import  networkx as nx
# Initialize a directed graph
G_test = nx.DiGraph()

# Add edges with attributes
for idx, row in test_df.iterrows():
    G_test.add_edge(
        row['From Account'],
        row['To Account'],
        amount_log=row['Amount Paid_log'],
        pair_frequency=row['pair_frequency'],
        reverse_pair_frequency=row['reverse_pair_frequency'],
        is_circular=row['is_circular']
    )

In [None]:
#in_degree_centrality = nx.in_degree_centrality(G)
#out_degree_centrality = nx.out_degree_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G, k=min(1000, len(G.nodes())))

In [None]:
#in_degree_centrality_test = nx.in_degree_centrality(G_test)
#out_degree_centrality_test = nx.out_degree_centrality(G_test)
betweenness_centrality_test = nx.betweenness_centrality(G_test, k=min(1000, len(G_test.nodes())))

In [242]:
#df['from_out_degree_centrality'] = df['From Account'].map(out_degree_centrality).fillna(0)
df['from_betweenness_centrality'] = df['From Account'].map(betweenness_centrality).fillna(0)
#df['to_in_degree_centrality'] = df['To Account'].map(in_degree_centrality).fillna(0)
df['to_betweenness_centrality'] = df['To Account'].map(betweenness_centrality).fillna(0)

In [243]:
#test_df['from_out_degree_centrality'] = test_df['From Account'].map(out_degree_centrality_test).fillna(0)
test_df['from_betweenness_centrality'] = test_df['From Account'].map(betweenness_centrality_test).fillna(0)
#test_df['to_in_degree_centrality'] = test_df['To Account'].map(in_degree_centrality_test).fillna(0)
test_df['to_betweenness_centrality'] = test_df['To Account'].map(betweenness_centrality_test).fillna(0)

In [None]:
df = df.drop(columns=['From Account', 'To Account'], errors='ignore')

# Separate features and target
X = df.drop(columns=['Is Laundering'])
y = df['Is Laundering']

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [245]:
#!pip install optuna

In [246]:
test_df = test_df.drop(columns=['From Account', 'To Account'], errors='ignore')

In [247]:
from sklearn.metrics import roc_auc_score, confusion_matrix

def calculate_balanced_accuracy(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
    tnr = tn / (tn + fp) if (tn + fp) > 0 else 0
    return (tpr + tnr) / 2

def calculate_fraud_capture_rate(y_true, y_prob, N=485):
    sorted_indices = np.argsort(y_prob)[::-1]
    top_N_indices = sorted_indices[:N]
    frauds_in_top_N = np.sum(y_true.iloc[top_N_indices] if hasattr(y_true, 'iloc') else y_true[top_N_indices])
    total_frauds = np.sum(y_true)
    return frauds_in_top_N / total_frauds if total_frauds > 0 else 0

def calculate_composite_score(y_true, y_pred, y_prob, N=485):
    auc_score = roc_auc_score(y_true, y_prob)
    balanced_acc = calculate_balanced_accuracy(y_true, y_pred)
    fraud_capture = calculate_fraud_capture_rate(y_true, y_prob, N)
    return (auc_score + balanced_acc + fraud_capture) / 3


### LGBM

In [250]:
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, confusion_matrix
import numpy as np

def objective(trial):
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1])
    
    # 🔧 Threshold being tuned by Optuna
    threshold = trial.suggest_float("threshold", 0.05, 0.5)

    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 300),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
        'max_depth': trial.suggest_int('max_depth', 5, 15),
        'num_leaves': trial.suggest_int('num_leaves', 20, 40),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 50),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'subsample_freq': trial.suggest_int('subsample_freq', 1, 5),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'max_bin': trial.suggest_int('max_bin', 200, 300),
        'scale_pos_weight': scale_pos_weight,
        'force_col_wise': True,
        'verbosity': -1
    }

    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    auc_scores = [] 
    balanced_accuracies = []
    fraud_capture_rates = []
    scores = []

    for train_idx, val_idx in kf.split(X_train, y_train):
        X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model = lgb.LGBMClassifier(**params, random_state=42)
        model.fit(X_fold_train, y_fold_train)

        y_prob = model.predict_proba(X_fold_val)[:, 1]
        y_pred = (y_prob >= threshold).astype(int)

        auc_score = roc_auc_score(y_fold_val, y_prob)
        balanced_acc = calculate_balanced_accuracy(y_fold_val, y_pred)
        fraud_capture = calculate_fraud_capture_rate(y_fold_val, y_prob, N=485)
        score = calculate_composite_score(y_fold_val, y_pred, y_prob, N=485)

        auc_scores.append(auc_score)
        balanced_accuracies.append(balanced_acc)    
        fraud_capture_rates.append(fraud_capture)
        scores.append(score)

    print(f"AUC: {np.mean(auc_scores)}, Balanced Accuracy: {np.mean(balanced_accuracies)}, Fraud Capture Rate: {np.mean(fraud_capture_rates)}, Composite Score: {np.mean(scores)}")

    return np.mean(scores)


In [251]:
import optuna

study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=50)

print("Best Hyperparameters:", study.best_params)
print("Best Composite Score:", round(study.best_value, 4))


[I 2025-06-16 04:46:00,389] A new study created in memory with name: no-name-5c4881e8-083c-4d74-b7f8-2040968c0fa0
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:02,163] Trial 0 finished with value: 0.9973394788306251 and parameters: {'threshold': 0.21854305348131314, 'n_estimators': 291, 'learning_rate': 0.0483437145318464, 'max_depth': 11, 'num_leaves': 23, 'min_child_samples': 16, 'subsample': 0.7174250836504598, 'subsample_freq': 5, 'colsample_bytree': 0.8404460046972835, 'reg_alpha': 0.7080725777960455, 'reg_lambda': 0.020584494295802447, 'max_bin': 297}. Best is trial 0 with value: 0.9973394788306251.


AUC: 0.9984435575137898, Balanced Accuracy: 0.997244603748728, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973394788306251


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:03,414] Trial 1 finished with value: 0.8306976890419859 and parameters: {'threshold': 0.4245991883601898, 'n_estimators': 142, 'learning_rate': 0.002620503255096255, 'max_depth': 7, 'num_leaves': 26, 'min_child_samples': 31, 'subsample': 0.8295835055926347, 'subsample_freq': 2, 'colsample_bytree': 0.8447411578889518, 'reg_alpha': 0.13949386065204183, 'reg_lambda': 0.29214464853521815, 'max_bin': 237}. Best is trial 0 with value: 0.9973394788306251.


AUC: 0.997580973714782, Balanced Accuracy: 0.5, Fraud Capture Rate: 0.994512093411176, Composite Score: 0.8306976890419859


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:05,577] Trial 2 finished with value: 0.9948533773499028 and parameters: {'threshold': 0.25523149289766617, 'n_estimators': 257, 'learning_rate': 0.0028804169778805498, 'max_depth': 10, 'num_leaves': 32, 'min_child_samples': 11, 'subsample': 0.8822634555704315, 'subsample_freq': 1, 'colsample_bytree': 0.6260206371941118, 'reg_alpha': 0.9488855372533332, 'reg_lambda': 0.9656320330745594, 'max_bin': 281}. Best is trial 0 with value: 0.9973394788306251.


AUC: 0.9975205285511084, Balanced Accuracy: 0.992527510087424, Fraud Capture Rate: 0.994512093411176, Composite Score: 0.9948533773499028


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:06,561] Trial 3 finished with value: 0.9971897720764205 and parameters: {'threshold': 0.1870761961280168, 'n_estimators': 119, 'learning_rate': 0.037535371452331254, 'max_depth': 9, 'num_leaves': 22, 'min_child_samples': 30, 'subsample': 0.7103165563345655, 'subsample_freq': 5, 'colsample_bytree': 0.7035119926400067, 'reg_alpha': 0.662522284353982, 'reg_lambda': 0.31171107608941095, 'max_bin': 252}. Best is trial 0 with value: 0.9973394788306251.


AUC: 0.9982232898420627, Balanced Accuracy: 0.9970157511578408, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9971897720764205


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:07,513] Trial 4 finished with value: 0.9973086226542248 and parameters: {'threshold': 0.2960196257044759, 'n_estimators': 137, 'learning_rate': 0.17023282716867383, 'max_depth': 13, 'num_leaves': 39, 'min_child_samples': 46, 'subsample': 0.8793699936433255, 'subsample_freq': 5, 'colsample_bytree': 0.6353970008207678, 'reg_alpha': 0.1959828624191452, 'reg_lambda': 0.045227288910538066, 'max_bin': 232}. Best is trial 0 with value: 0.9973394788306251.


AUC: 0.9983395460277285, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973086226542248


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:08,598] Trial 5 finished with value: 0.9973174811510714 and parameters: {'threshold': 0.2249047803602669, 'n_estimators': 154, 'learning_rate': 0.08071418522169696, 'max_depth': 8, 'num_leaves': 25, 'min_child_samples': 32, 'subsample': 0.7422772674924287, 'subsample_freq': 5, 'colsample_bytree': 0.6298202574719083, 'reg_alpha': 0.9868869366005173, 'reg_lambda': 0.7722447692966574, 'max_bin': 220}. Best is trial 0 with value: 0.9973394788306251.


AUC: 0.9983890074319886, Balanced Accuracy: 0.997233160791868, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973174811510714


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:10,209] Trial 6 finished with value: 0.9974191057760695 and parameters: {'threshold': 0.052484952705621084, 'n_estimators': 263, 'learning_rate': 0.04231554618260076, 'max_depth': 13, 'num_leaves': 36, 'min_child_samples': 13, 'subsample': 0.8075397185632818, 'subsample_freq': 1, 'colsample_bytree': 0.9452413703502374, 'reg_alpha': 0.6232981268275579, 'reg_lambda': 0.3308980248526492, 'max_bin': 206}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9987396531344231, Balanced Accuracy: 0.9971873889644277, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9974191057760695


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:11,464] Trial 7 finished with value: 0.9971962595096224 and parameters: {'threshold': 0.189942044772048, 'n_estimators': 165, 'learning_rate': 0.04773596444669527, 'max_depth': 12, 'num_leaves': 38, 'min_child_samples': 29, 'subsample': 0.7358782737814905, 'subsample_freq': 4, 'colsample_bytree': 0.9043140194467589, 'reg_alpha': 0.5612771975694962, 'reg_lambda': 0.770967179954561, 'max_bin': 249}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9980138995507815, Balanced Accuracy: 0.997244603748728, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9971962595096224


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:12,588] Trial 8 finished with value: 0.8306843029742463 and parameters: {'threshold': 0.28522977322189735, 'n_estimators': 185, 'learning_rate': 0.0011441689901040717, 'max_depth': 6, 'num_leaves': 20, 'min_child_samples': 36, 'subsample': 0.794306794322898, 'subsample_freq': 3, 'colsample_bytree': 0.9630265895704372, 'reg_alpha': 0.24929222914887494, 'reg_lambda': 0.41038292303562973, 'max_bin': 276}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9975408155115634, Balanced Accuracy: 0.5, Fraud Capture Rate: 0.994512093411176, Composite Score: 0.8306843029742463


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:13,516] Trial 9 finished with value: 0.9961264716872862 and parameters: {'threshold': 0.1529591744712301, 'n_estimators': 115, 'learning_rate': 0.0046422313960206695, 'max_depth': 6, 'num_leaves': 39, 'min_child_samples': 43, 'subsample': 0.8900211269531271, 'subsample_freq': 5, 'colsample_bytree': 0.9214688307596458, 'reg_alpha': 0.18657005888603584, 'reg_lambda': 0.8925589984899778, 'max_bin': 254}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9968973410110189, Balanced Accuracy: 0.9969699806396634, Fraud Capture Rate: 0.994512093411176, Composite Score: 0.9961264716872862


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:15,449] Trial 10 finished with value: 0.9969877810927634 and parameters: {'threshold': 0.059251935237602735, 'n_estimators': 238, 'learning_rate': 0.013945622920790912, 'max_depth': 15, 'num_leaves': 33, 'min_child_samples': 21, 'subsample': 0.9879312237836261, 'subsample_freq': 1, 'colsample_bytree': 0.7590929544765699, 'reg_alpha': 0.41979011751915785, 'reg_lambda': 0.5904746895937297, 'max_bin': 200}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9978004028915898, Balanced Accuracy: 0.9968326651573427, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9969877810927634


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:17,361] Trial 11 finished with value: 0.9971929695283986 and parameters: {'threshold': 0.07061152014203331, 'n_estimators': 300, 'learning_rate': 0.019694366952795934, 'max_depth': 12, 'num_leaves': 29, 'min_child_samples': 10, 'subsample': 0.7807997694361396, 'subsample_freq': 3, 'colsample_bytree': 0.8436612596493299, 'reg_alpha': 0.7399515059400725, 'reg_lambda': 0.060528481248404487, 'max_bin': 292}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9982672110685776, Balanced Accuracy: 0.9969814222872607, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9971929695283986


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:19,255] Trial 12 finished with value: 0.9968345157553562 and parameters: {'threshold': 0.397365805571032, 'n_estimators': 298, 'learning_rate': 0.01280734943297432, 'max_depth': 14, 'num_leaves': 34, 'min_child_samples': 18, 'subsample': 0.7926062467845699, 'subsample_freq': 2, 'colsample_bytree': 0.9727316464879765, 'reg_alpha': 0.7758924049222458, 'reg_lambda': 0.18451614386025356, 'max_bin': 203}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.99714607792201, Balanced Accuracy: 0.997027194114701, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9968345157553562


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:20,552] Trial 13 finished with value: 0.9974151678324242 and parameters: {'threshold': 0.12088407857092512, 'n_estimators': 252, 'learning_rate': 0.18360492379851223, 'max_depth': 11, 'num_leaves': 36, 'min_child_samples': 19, 'subsample': 0.9450972662302015, 'subsample_freq': 4, 'colsample_bytree': 0.7977779145544659, 'reg_alpha': 0.42558946436382894, 'reg_lambda': 0.5255424110297373, 'max_bin': 267}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9987278393034874, Balanced Accuracy: 0.9971873889644277, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9974151678324242


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:21,758] Trial 14 finished with value: 0.9974021051562361 and parameters: {'threshold': 0.11288875880659965, 'n_estimators': 228, 'learning_rate': 0.18246951725321595, 'max_depth': 10, 'num_leaves': 36, 'min_child_samples': 22, 'subsample': 0.9794475667206225, 'subsample_freq': 4, 'colsample_bytree': 0.7609185737563394, 'reg_alpha': 0.41221353992215093, 'reg_lambda': 0.5625128570253415, 'max_bin': 269}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9986886512749221, Balanced Accuracy: 0.9971873889644277, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9974021051562361


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:23,577] Trial 15 finished with value: 0.9973658673830432 and parameters: {'threshold': 0.11349972760389912, 'n_estimators': 262, 'learning_rate': 0.09656280354921278, 'max_depth': 15, 'num_leaves': 36, 'min_child_samples': 14, 'subsample': 0.9416157882466093, 'subsample_freq': 2, 'colsample_bytree': 0.7874055053145005, 'reg_alpha': 0.530352595787787, 'reg_lambda': 0.39187638617313014, 'max_bin': 217}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.998522723171044, Balanced Accuracy: 0.997244603748728, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973658673830432


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:25,436] Trial 16 finished with value: 0.9972492558710246 and parameters: {'threshold': 0.4867481870538959, 'n_estimators': 214, 'learning_rate': 0.024134668193677336, 'max_depth': 13, 'num_leaves': 29, 'min_child_samples': 24, 'subsample': 0.9250143694637856, 'subsample_freq': 4, 'colsample_bytree': 0.7079499309917164, 'reg_alpha': 0.015273881357550367, 'reg_lambda': 0.48696095538965, 'max_bin': 268}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9982529880237461, Balanced Accuracy: 0.9971645043599704, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9972492558710246


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:27,240] Trial 17 finished with value: 0.997356485732882 and parameters: {'threshold': 0.11793873887773039, 'n_estimators': 265, 'learning_rate': 0.09157250691122128, 'max_depth': 11, 'num_leaves': 36, 'min_child_samples': 26, 'subsample': 0.8332420058027389, 'subsample_freq': 3, 'colsample_bytree': 0.9000451830589946, 'reg_alpha': 0.38290924532454923, 'reg_lambda': 0.6628126665727366, 'max_bin': 240}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9984945782205603, Balanced Accuracy: 0.997244603748728, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.997356485732882


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:29,018] Trial 18 finished with value: 0.9969365632016247 and parameters: {'threshold': 0.05769719929317504, 'n_estimators': 190, 'learning_rate': 0.008684266142771367, 'max_depth': 13, 'num_leaves': 40, 'min_child_samples': 18, 'subsample': 0.9411704743828452, 'subsample_freq': 1, 'colsample_bytree': 0.7065916059622793, 'reg_alpha': 0.6108831317924108, 'reg_lambda': 0.21289083638344053, 'max_bin': 260}. Best is trial 6 with value: 0.9974191057760695.


AUC: 0.9978984916505691, Balanced Accuracy: 0.9965809227249472, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9969365632016247


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:30,048] Trial 19 finished with value: 0.9975816398743913 and parameters: {'threshold': 0.346455398480797, 'n_estimators': 242, 'learning_rate': 0.19868642303605738, 'max_depth': 9, 'num_leaves': 31, 'min_child_samples': 13, 'subsample': 0.9121522207270025, 'subsample_freq': 4, 'colsample_bytree': 0.8698489311347629, 'reg_alpha': 0.8362933585743446, 'reg_lambda': 0.4520321131575954, 'max_bin': 214}. Best is trial 19 with value: 0.9975816398743913.


AUC: 0.9992272541201259, Balanced Accuracy: 0.9971873902736904, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975816398743913


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:31,773] Trial 20 finished with value: 0.9969880162355613 and parameters: {'threshold': 0.33562199529871534, 'n_estimators': 277, 'learning_rate': 0.06795448716496058, 'max_depth': 8, 'num_leaves': 30, 'min_child_samples': 38, 'subsample': 0.8531780048163369, 'subsample_freq': 2, 'colsample_bytree': 0.9315036931038344, 'reg_alpha': 0.8597022329559757, 'reg_lambda': 0.1854086060323945, 'max_bin': 214}. Best is trial 19 with value: 0.9975816398743913.


AUC: 0.9973891697285981, Balanced Accuracy: 0.997244603748728, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9969880162355613


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:32,772] Trial 21 finished with value: 0.9973026006018879 and parameters: {'threshold': 0.3592985829338455, 'n_estimators': 239, 'learning_rate': 0.19187838199505386, 'max_depth': 9, 'num_leaves': 32, 'min_child_samples': 13, 'subsample': 0.9087318598366706, 'subsample_freq': 4, 'colsample_bytree': 0.9998487723511209, 'reg_alpha': 0.8771249194554073, 'reg_lambda': 0.45088522178407364, 'max_bin': 227}. Best is trial 19 with value: 0.9975816398743913.


AUC: 0.999287784254846, Balanced Accuracy: 0.9962897423214596, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973026006018879


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:34,307] Trial 22 finished with value: 0.9974705121038667 and parameters: {'threshold': 0.3299828209260594, 'n_estimators': 210, 'learning_rate': 0.11471039918958553, 'max_depth': 11, 'num_leaves': 35, 'min_child_samples': 18, 'subsample': 0.962637454973575, 'subsample_freq': 3, 'colsample_bytree': 0.873608684642096, 'reg_alpha': 0.32003111976649706, 'reg_lambda': 0.32839260981839824, 'max_bin': 212}. Best is trial 19 with value: 0.9975816398743913.


AUC: 0.9988252143766541, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9974705121038667


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:35,910] Trial 23 finished with value: 0.9975937895368769 and parameters: {'threshold': 0.3237673426889575, 'n_estimators': 218, 'learning_rate': 0.12276242922034046, 'max_depth': 12, 'num_leaves': 34, 'min_child_samples': 14, 'subsample': 0.995562324271082, 'subsample_freq': 3, 'colsample_bytree': 0.8718103609302557, 'reg_alpha': 0.28582792426213083, 'reg_lambda': 0.3213291097403035, 'max_bin': 208}. Best is trial 23 with value: 0.9975937895368769.


AUC: 0.9992179325894052, Balanced Accuracy: 0.997233160791868, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975937895368769


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:37,333] Trial 24 finished with value: 0.9975787098283291 and parameters: {'threshold': 0.33383473970036814, 'n_estimators': 207, 'learning_rate': 0.12600547530741904, 'max_depth': 9, 'num_leaves': 27, 'min_child_samples': 16, 'subsample': 0.9666696827503615, 'subsample_freq': 3, 'colsample_bytree': 0.8853348979757798, 'reg_alpha': 0.3071963529816846, 'reg_lambda': 0.2363178923970578, 'max_bin': 211}. Best is trial 23 with value: 0.9975937895368769.


AUC: 0.9991498075500417, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975787098283291


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:38,561] Trial 25 finished with value: 0.9975909870240038 and parameters: {'threshold': 0.3965322059792872, 'n_estimators': 178, 'learning_rate': 0.1273551088201357, 'max_depth': 9, 'num_leaves': 27, 'min_child_samples': 10, 'subsample': 0.9724857970121328, 'subsample_freq': 3, 'colsample_bytree': 0.8736425269994006, 'reg_alpha': 0.30442548876378184, 'reg_lambda': 0.11401544663262564, 'max_bin': 224}. Best is trial 23 with value: 0.9975937895368769.


AUC: 0.9991980820939255, Balanced Accuracy: 0.9972446037487279, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975909870240038


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:40,086] Trial 26 finished with value: 0.997147498250478 and parameters: {'threshold': 0.4380976949391759, 'n_estimators': 179, 'learning_rate': 0.06240269584702076, 'max_depth': 5, 'num_leaves': 28, 'min_child_samples': 10, 'subsample': 0.9991121733869606, 'subsample_freq': 3, 'colsample_bytree': 0.8648283325149888, 'reg_alpha': 0.5079780680239647, 'reg_lambda': 0.14551223135059366, 'max_bin': 222}. Best is trial 23 with value: 0.9975937895368769.


AUC: 0.9978561728164888, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.997147498250478


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:41,635] Trial 27 finished with value: 0.9976169154919379 and parameters: {'threshold': 0.3896435594791264, 'n_estimators': 222, 'learning_rate': 0.11697854766813805, 'max_depth': 8, 'num_leaves': 32, 'min_child_samples': 50, 'subsample': 0.9145673069030011, 'subsample_freq': 4, 'colsample_bytree': 0.8151732911561955, 'reg_alpha': 0.09067792462375793, 'reg_lambda': 0.1321187744850242, 'max_bin': 226}. Best is trial 27 with value: 0.9976169154919379.


AUC: 0.9992644245408677, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9976169154919379


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:43,347] Trial 28 finished with value: 0.9971783308845353 and parameters: {'threshold': 0.3863432565883258, 'n_estimators': 223, 'learning_rate': 0.028164710934873778, 'max_depth': 8, 'num_leaves': 24, 'min_child_samples': 50, 'subsample': 0.9628160833474195, 'subsample_freq': 3, 'colsample_bytree': 0.8361784486713681, 'reg_alpha': 0.029687704021263084, 'reg_lambda': 0.11499750113525839, 'max_bin': 226}. Best is trial 27 with value: 0.9976169154919379.


AUC: 0.9979715566323806, Balanced Accuracy: 0.997233160791868, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9971783308845353


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:45,052] Trial 29 finished with value: 0.9970685794061872 and parameters: {'threshold': 0.4742047936159329, 'n_estimators': 197, 'learning_rate': 0.05985347322029144, 'max_depth': 7, 'num_leaves': 31, 'min_child_samples': 42, 'subsample': 0.9946352176365593, 'subsample_freq': 2, 'colsample_bytree': 0.8146373469692526, 'reg_alpha': 0.10256314131573546, 'reg_lambda': 0.01458869701420426, 'max_bin': 242}. Best is trial 27 with value: 0.9976169154919379.


AUC: 0.9976194162836158, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9970685794061872


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:46,211] Trial 30 finished with value: 0.9976719880988112 and parameters: {'threshold': 0.44286465058062224, 'n_estimators': 171, 'learning_rate': 0.12792961140536083, 'max_depth': 10, 'num_leaves': 34, 'min_child_samples': 50, 'subsample': 0.9208777758688815, 'subsample_freq': 4, 'colsample_bytree': 0.7677410569512204, 'reg_alpha': 0.28067157957490324, 'reg_lambda': 0.0859139708073734, 'max_bin': 232}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9994296423614877, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9976719880988112


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:47,411] Trial 31 finished with value: 0.9975443819361471 and parameters: {'threshold': 0.4467532246016623, 'n_estimators': 166, 'learning_rate': 0.1128884452540147, 'max_depth': 10, 'num_leaves': 34, 'min_child_samples': 50, 'subsample': 0.9319146027600906, 'subsample_freq': 4, 'colsample_bytree': 0.7630971793597435, 'reg_alpha': 0.2868097990682057, 'reg_lambda': 0.09319118605499564, 'max_bin': 231}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.999058266830356, Balanced Accuracy: 0.997244603748728, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975443819361471


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:48,738] Trial 32 finished with value: 0.9975716318473553 and parameters: {'threshold': 0.39477418437970596, 'n_estimators': 175, 'learning_rate': 0.12317975726731079, 'max_depth': 12, 'num_leaves': 33, 'min_child_samples': 47, 'subsample': 0.9747419808800235, 'subsample_freq': 4, 'colsample_bytree': 0.7343331573102305, 'reg_alpha': 0.0982789216451238, 'reg_lambda': 0.2695727769998643, 'max_bin': 234}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9991285736071205, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975716318473553


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:49,833] Trial 33 finished with value: 0.9975975780978956 and parameters: {'threshold': 0.42201412229989704, 'n_estimators': 202, 'learning_rate': 0.136700786813147, 'max_depth': 7, 'num_leaves': 27, 'min_child_samples': 42, 'subsample': 0.8987582216224096, 'subsample_freq': 3, 'colsample_bytree': 0.8178715018834065, 'reg_alpha': 0.21365892122265845, 'reg_lambda': 0.014800630642531232, 'max_bin': 224}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9992064123587407, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975975780978956


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:51,267] Trial 34 finished with value: 0.9972298259863196 and parameters: {'threshold': 0.45411747321086454, 'n_estimators': 198, 'learning_rate': 0.07792154922987968, 'max_depth': 7, 'num_leaves': 31, 'min_child_samples': 46, 'subsample': 0.8652761080992885, 'subsample_freq': 3, 'colsample_bytree': 0.8166213191371319, 'reg_alpha': 0.2251784376153858, 'reg_lambda': 0.03368716773181173, 'max_bin': 244}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9981031560240133, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9972298259863196


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:52,860] Trial 35 finished with value: 0.9972586986190246 and parameters: {'threshold': 0.37275084925146534, 'n_estimators': 223, 'learning_rate': 0.050201477915616945, 'max_depth': 6, 'num_leaves': 33, 'min_child_samples': 42, 'subsample': 0.8996499099047116, 'subsample_freq': 5, 'colsample_bytree': 0.6711503482770811, 'reg_alpha': 0.10784195553229381, 'reg_lambda': 0.15289723554501589, 'max_bin': 208}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9981897739221278, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9972586986190246


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:53,953] Trial 36 finished with value: 0.9970610599768861 and parameters: {'threshold': 0.41992533191647563, 'n_estimators': 138, 'learning_rate': 0.033615850528525504, 'max_depth': 8, 'num_leaves': 25, 'min_child_samples': 48, 'subsample': 0.8756760912506724, 'subsample_freq': 4, 'colsample_bytree': 0.7871714631374699, 'reg_alpha': 0.16284404327680377, 'reg_lambda': 0.018791379471438982, 'max_bin': 236}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9977341708595077, Balanced Accuracy: 0.9971187338417928, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9970610599768861


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:55,241] Trial 37 finished with value: 0.9974202827236123 and parameters: {'threshold': 0.24813727166856508, 'n_estimators': 150, 'learning_rate': 0.14723419771596746, 'max_depth': 10, 'num_leaves': 38, 'min_child_samples': 38, 'subsample': 0.9176609107151217, 'subsample_freq': 2, 'colsample_bytree': 0.8242652381619606, 'reg_alpha': 0.048012452992819976, 'reg_lambda': 0.2650358381016363, 'max_bin': 219}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.998685969192751, Balanced Accuracy: 0.997244603748728, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9974202827236123


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:56,398] Trial 38 finished with value: 0.9973494683077696 and parameters: {'threshold': 0.3034088933851865, 'n_estimators': 127, 'learning_rate': 0.07957492037647722, 'max_depth': 7, 'num_leaves': 22, 'min_child_samples': 44, 'subsample': 0.8390083012831814, 'subsample_freq': 5, 'colsample_bytree': 0.729148768101647, 'reg_alpha': 0.36924857194788074, 'reg_lambda': 0.09654402608795007, 'max_bin': 229}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.998462082988363, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973494683077696


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:57,554] Trial 39 finished with value: 0.8306522892755848 and parameters: {'threshold': 0.42139691609300645, 'n_estimators': 164, 'learning_rate': 0.0011802990775971042, 'max_depth': 5, 'num_leaves': 29, 'min_child_samples': 33, 'subsample': 0.8935926120098184, 'subsample_freq': 4, 'colsample_bytree': 0.8547355535015182, 'reg_alpha': 0.2200680846010277, 'reg_lambda': 0.34805886543346853, 'max_bin': 207}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.997444774415578, Balanced Accuracy: 0.5, Fraud Capture Rate: 0.994512093411176, Composite Score: 0.8306522892755848


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:46:59,221] Trial 40 finished with value: 0.9970099496567736 and parameters: {'threshold': 0.3142671484290257, 'n_estimators': 215, 'learning_rate': 0.005758982247354381, 'max_depth': 10, 'num_leaves': 34, 'min_child_samples': 45, 'subsample': 0.864140092718535, 'subsample_freq': 3, 'colsample_bytree': 0.7688910199256646, 'reg_alpha': 0.14151855444507044, 'reg_lambda': 0.06424114926434732, 'max_bin': 221}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9976838225831222, Balanced Accuracy: 0.9970157511578408, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9970099496567736


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:00,440] Trial 41 finished with value: 0.9973924495396492 and parameters: {'threshold': 0.411551352082249, 'n_estimators': 190, 'learning_rate': 0.1384602364426375, 'max_depth': 9, 'num_leaves': 27, 'min_child_samples': 40, 'subsample': 0.9525074424023628, 'subsample_freq': 3, 'colsample_bytree': 0.8938547005687834, 'reg_alpha': 0.2876968043128164, 'reg_lambda': 0.1389796445826854, 'max_bin': 227}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9985910266840017, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973924495396492


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:01,685] Trial 42 finished with value: 0.9973653523968788 and parameters: {'threshold': 0.4685075378481394, 'n_estimators': 175, 'learning_rate': 0.09869026868343728, 'max_depth': 8, 'num_leaves': 26, 'min_child_samples': 48, 'subsample': 0.9291812557248362, 'subsample_freq': 3, 'colsample_bytree': 0.8370217762774174, 'reg_alpha': 0.4642289801332714, 'reg_lambda': 0.006577175319307328, 'max_bin': 223}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9985097352556904, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973653523968788


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:02,848] Trial 43 finished with value: 0.9972600164846661 and parameters: {'threshold': 0.4990441623213355, 'n_estimators': 202, 'learning_rate': 0.15134940390021936, 'max_depth': 7, 'num_leaves': 27, 'min_child_samples': 50, 'subsample': 0.9537738309457362, 'subsample_freq': 3, 'colsample_bytree': 0.8071024585957662, 'reg_alpha': 0.24479816105745528, 'reg_lambda': 0.21197556614222293, 'max_bin': 246}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9981937275190529, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9972600164846661


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:04,202] Trial 44 finished with value: 0.9971645862487712 and parameters: {'threshold': 0.37610926012141777, 'n_estimators': 154, 'learning_rate': 0.05820670812076163, 'max_depth': 9, 'num_leaves': 32, 'min_child_samples': 28, 'subsample': 0.9720633192122913, 'subsample_freq': 4, 'colsample_bytree': 0.8516092904751165, 'reg_alpha': 0.3529244853007401, 'reg_lambda': 0.07858855633985609, 'max_bin': 238}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9979074368113678, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9971645862487712


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:06,109] Trial 45 finished with value: 0.9972732470071077 and parameters: {'threshold': 0.36116242849150393, 'n_estimators': 230, 'learning_rate': 0.03895812003154235, 'max_depth': 12, 'num_leaves': 30, 'min_child_samples': 34, 'subsample': 0.9854567195041155, 'subsample_freq': 5, 'colsample_bytree': 0.6056470054581536, 'reg_alpha': 0.26792641940620515, 'reg_lambda': 0.16545952467952413, 'max_bin': 232}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.998256303690835, Balanced Accuracy: 0.9972331621011307, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9972732470071077


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:07,078] Trial 46 finished with value: 0.9973598302991172 and parameters: {'threshold': 0.40418695755838596, 'n_estimators': 106, 'learning_rate': 0.09650631242328617, 'max_depth': 8, 'num_leaves': 25, 'min_child_samples': 40, 'subsample': 0.8782408734674049, 'subsample_freq': 2, 'colsample_bytree': 0.9074765367751397, 'reg_alpha': 0.18079522466684783, 'reg_lambda': 0.2886968717685934, 'max_bin': 217}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9984931689624059, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9973598302991172


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:08,269] Trial 47 finished with value: 0.9975224431723275 and parameters: {'threshold': 0.43530776370427304, 'n_estimators': 188, 'learning_rate': 0.1478488839159886, 'max_depth': 11, 'num_leaves': 28, 'min_child_samples': 15, 'subsample': 0.8193766042245468, 'subsample_freq': 4, 'colsample_bytree': 0.7839280227801567, 'reg_alpha': 0.34032150924898885, 'reg_lambda': 0.9985364655754747, 'max_bin': 202}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9989810075820366, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9975224431723275


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:09,610] Trial 48 finished with value: 0.9971472250513382 and parameters: {'threshold': 0.25561118921191106, 'n_estimators': 181, 'learning_rate': 0.07329352634033742, 'max_depth': 6, 'num_leaves': 23, 'min_child_samples': 12, 'subsample': 0.9017038661852846, 'subsample_freq': 3, 'colsample_bytree': 0.8262736334632728, 'reg_alpha': 0.07456053925982306, 'reg_lambda': 0.11817962630623305, 'max_bin': 297}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9978553532190688, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9971472250513382


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:47:11,456] Trial 49 finished with value: 0.9971386138038755 and parameters: {'threshold': 0.46044152747864175, 'n_estimators': 248, 'learning_rate': 0.05274715848573031, 'max_depth': 14, 'num_leaves': 37, 'min_child_samples': 35, 'subsample': 0.9235697025414098, 'subsample_freq': 3, 'colsample_bytree': 0.7276805459164446, 'reg_alpha': 0.18866808989245326, 'reg_lambda': 0.24021786628672528, 'max_bin': 225}. Best is trial 30 with value: 0.9976719880988112.


AUC: 0.9978295194766804, Balanced Accuracy: 0.997256046705588, Fraud Capture Rate: 0.9963302752293579, Composite Score: 0.9971386138038755
Best Hyperparameters: {'threshold': 0.44286465058062224, 'n_estimators': 171, 'learning_rate': 0.12792961140536083, 'max_depth': 10, 'num_leaves': 34, 'min_child_samples': 50, 'subsample': 0.9208777758688815, 'subsample_freq': 4, 'colsample_bytree': 0.7677410569512204, 'reg_alpha': 0.28067157957490324, 'reg_lambda': 0.0859139708073734, 'max_bin': 232}
Best Composite Score: 0.9977


In [252]:
import lightgbm as lgb
import pandas as pd
import numpy as np

# ✅ Best hyperparameters from Optuna
best_params_lgb = {
    'n_estimators': 171,
    'learning_rate': 0.12792961140536083,
    'max_depth': 10,
    'num_leaves': 34,
    'min_child_samples': 50,
    'subsample': 0.9208777758688815,
    'subsample_freq': 4,
    'colsample_bytree': 0.7677410569512204,
    'reg_alpha': 0.28067157957490324,
    'reg_lambda': 0.0859139708073734,
    'max_bin': 232,
    'scale_pos_weight': len(y_train[y_train == 0]) / len(y_train[y_train == 1]),  # Handling class imbalance
    'force_col_wise': True,
    'verbosity': -1
}

# 🔁 Train final model with the best parameters
final_lgb = lgb.LGBMClassifier(**best_params_lgb, random_state=42)
final_lgb.fit(X_train, y_train)

# 📊 Predict probabilities on test set
test_probs_lgb = final_lgb.predict_proba(test_df)[:, 1]

# 🔎 Apply the optimized threshold
best_threshold = 0.44286465058062224
test_preds = (test_probs_lgb >= best_threshold).astype(int)

# 💾 Save predictions in the required format
submission = pd.DataFrame({
    0: test_probs_lgb,
    1: test_preds
})
submission.to_csv("submission.txt", sep=" ", header=False, index=False)


In [253]:
with open("submission_lightgbm.txt", "w") as f:
    for prob, pred in zip(test_probs_lgb, test_preds):
        f.write(f"{prob} {pred}\n")


### XGB

In [None]:
import xgboost as xgb

def objective(trial):
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1])
    
    # Tune threshold as part of objective
    threshold = trial.suggest_float("threshold", 0.05, 0.5)

    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 300),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
        'max_depth': trial.suggest_int('max_depth', 5, 15),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.0, 5.0),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'scale_pos_weight': scale_pos_weight,
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'use_label_encoder': False,
        'verbosity': 0
    }

    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    
    auc_scores = [] 
    balanced_accuracies = []
    fraud_capture_rates = []
    scores = []

    for train_idx, val_idx in kf.split(X_train, y_train):
        X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model = xgb.XGBClassifier(**params)
        model.fit(X_fold_train, y_fold_train)

        y_prob = model.predict_proba(X_fold_val)[:, 1]
        y_pred = (y_prob >= threshold).astype(int)

        auc_score = roc_auc_score(y_fold_val, y_prob)
        balanced_acc = calculate_balanced_accuracy(y_fold_val, y_pred)
        fraud_capture = calculate_fraud_capture_rate(y_fold_val, y_prob, N=485)
        score = calculate_composite_score(y_fold_val, y_pred, y_prob, N=485)

        auc_scores.append(auc_score)
        balanced_accuracies.append(balanced_acc)    
        fraud_capture_rates.append(fraud_capture)
        scores.append(score)

    print(f"AUC: {np.mean(auc_scores)}, Balanced Accuracy: {np.mean(balanced_accuracies)}, Fraud Capture Rate: {np.mean(fraud_capture_rates)}, Composite Score: {np.mean(scores)}")

    return np.mean(scores)


In [None]:
import optuna

study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=50)

print("✅ Best XGBoost Parameters:", study.best_params)
print("🎯 Best Composite Score:", round(study.best_value, 4))


[I 2025-06-16 04:37:49,424] A new study created in memory with name: no-name-0304bfa1-33db-4425-ba5b-4d41c55aadac
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:37:52,087] Trial 0 finished with value: 0.9972811542537358 and parameters: {'threshold': 0.21854305348131314, 'n_estimators': 291, 'learning_rate': 0.0483437145318464, 'max_depth': 11, 'min_child_weight': 2, 'gamma': 0.7799726016810132, 'subsample': 0.6232334448672797, 'colsample_bytree': 0.9464704583099741, 'reg_alpha': 0.6011150117432088, 'reg_lambda': 0.7080725777960455}. Best is trial 0 with value: 0.9972811542537358.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.2),
[I 2025-06-16 04:37:54,143] Trial 1 finished with value: 0.9971633408035252 and parameters: {'threshold': 0.0592630224331111, 'n_estimators': 294, 'learning_rate': 0.0823143373099555, 'max_depth': 7, 'min_child_weight': 2, 'gamma': 0.9170225492671691, 'subsample': 0.7216968971838151, 'colsample

✅ Best XGBoost Parameters: {'threshold': 0.3595320880199634, 'n_estimators': 286, 'learning_rate': 0.03441730150512816, 'max_depth': 12, 'min_child_weight': 4, 'gamma': 0.06655413271247834, 'subsample': 0.6364784353029371, 'colsample_bytree': 0.960836926077814, 'reg_alpha': 0.8384948295032638, 'reg_lambda': 0.629887207892041}
🎯 Best Composite Score: 0.9975


In [254]:
import xgboost as xgb
import pandas as pd
import numpy as np

# ✅ Best XGBoost hyperparameters from Optuna
best_params_xgb = {
    'n_estimators': 286,
    'learning_rate': 0.03441730150512816,
    'max_depth': 12,
    'min_child_weight': 4,
    'gamma': 0.06655413271247834,
    'subsample': 0.6364784353029371,
    'colsample_bytree': 0.960836926077814,
    'reg_alpha': 0.8384948295032638,
    'reg_lambda': 0.629887207892041,
    'scale_pos_weight': len(y_train[y_train == 0]) / len(y_train[y_train == 1]),  # Handling class imbalance
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'use_label_encoder': False,
    'verbosity': 0
}

# 🔁 Train the final XGBoost model on the full training set
final_xgb = xgb.XGBClassifier(**best_params_xgb, random_state=42)
final_xgb.fit(X_train, y_train)

# 📊 Predict probabilities on test set
test_probs_xgb = final_xgb.predict_proba(test_df)[:, 1]

# 🔎 Apply optimized classification threshold
best_threshold = 0.3595320880199634
test_preds = (test_probs_xgb >= best_threshold).astype(int)

# 💾 Save predictions to submission file
submission = pd.DataFrame({
    0: test_probs_xgb,
    1: test_preds
})
submission.to_csv("submission.txt", sep=" ", header=False, index=False)



In [255]:
with open("submission_xgb.txt", "w") as f:
    for prob, pred in zip(test_probs_xgb, test_preds):
        f.write(f"{prob} {pred}\n")


In [256]:
# Simple average ensemble
blended_probs = 0.4 * test_probs_lgb + 0.6 * test_probs_xgb

In [257]:
with open("submission_ens.txt", "w") as f:
    for prob, pred in zip(blended_probs, test_preds):
        f.write(f"{prob} {pred}\n")


In [261]:
# check if 0 and 1 in lgb and xgb predictions match
lgb_preds = (test_probs_lgb >= best_threshold).astype(int)
xgb_preds = (test_probs_xgb >= best_threshold).astype(int)
# show when they don't match
mismatch_indices = np.where(lgb_preds != xgb_preds)[0]
print("Indices where LGBM and XGBoost predictions mismatch:", mismatch_indices)

Indices where LGBM and XGBoost predictions mismatch: [11286 14461 15892 16898 17488]
