In [1]:
features = [
    'lead_speed_diff',
    'hp_advantage_seen','mons_revealed_diff','team_status_diff','end_boost_diff',
    'total_damage_dealt','total_healing_done','status_turns',
    'first_faint_turn','total_stats_diff','damage_diff_turn10',
    'damage_diff_turn20','damage_diff_turn25','damage_diff_turn30',
    'hp_trend_diff','feat_switch_diff','feat_aggression_diff','hp_diff_std',
    'hp_diff_range','momentum_shift_turn','comeback_score','early_sustain',
    'status_balance','boost_volatility','boost_trend','move_power_diff',
    'move_diversity_diff','stall_ratio','aggression_index',
    'stats_speed_interaction',
    'hp_vs_stats_ratio','damage_ratio_turn25_30','damage_ratio_turn20_25',
    'damage_ratio_turn10_20','damage_ratio_turn10_30',
    'atk_def_ratio_p1','atk_def_ratio_p2','hp_speed_interaction_lead','hp_def_ratio_p1',
    'hp_def_ratio_p2','p1_hp_mean','p2_hp_mean','hp_diff_mean','hp_diff_last',
    'p1_boost_mean','p2_boost_mean','boost_diff_mean','p1_status_total',
    'p2_status_total','momentum_flips','p1_aggression','p2_aggression',
    'aggression_diff','feat_team_emb_sim',
    'lead_type_adv','meta_diff','feat_status_diff_inflicted','status_setup_diff',
    
]


In [2]:
from main import load_data
from Features.features_olya import create_advanced_features_gen2
import os
import pandas as pd
from utils.load_json import load_jsonl
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

test_size=0.2
random_state=42
train_df, test_df = load_data()



fraction_to_use = 1.0
train_df_subset = train_df.sample(frac=fraction_to_use, random_state=42).reset_index(drop=True)
test_df_subset = test_df.sample(frac=fraction_to_use, random_state=42).reset_index(drop=True)

# Feature engineering
X_train_features = create_advanced_features_gen2(train_df_subset)
X_test_features = create_advanced_features_gen2(test_df_subset)

# Target
y_train = train_df_subset.set_index('battle_id')['player_won']
y_test = test_df_subset.set_index('battle_id')

# Train/val split
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train_features,
    y_train,
    test_size=test_size,
    random_state=random_state,
    stratify=y_train
)

print("Shapes:")
print(X_train_split.shape, X_val_split.shape, y_train_split.shape, y_val_split.shape)

✓ Local environment detected. Loading data from: Data
✓ train.jsonl loaded successfully. Shape: (10000, 5)
✓ test.jsonl loaded successfully. Shape: (5000, 4)


Generating advanced features:   0%|          | 0/10000 [00:00<?, ?it/s]

Generating advanced features:   0%|          | 0/5000 [00:00<?, ?it/s]

Shapes:
(8000, 84) (2000, 84) (8000,) (2000,)


In [14]:
from Models.pipeline import get_pipeline

"""
Available models and recommended scaler usage:

1. Logistic Regression ('logistic')
   - Recommended scaler: RobustScaler (default 'auto')
   - Key parameters: C, penalty ('l1', 'l2'), class_weight

2. Random Forest ('random_forest')
   - Recommended scaler: RobustScaler (default 'auto')
   - Key parameters: n_estimators, max_depth, min_samples_split, min_samples_leaf, max_features

3. XGBoost ('xgboost')
   - Recommended scaler: RobustScaler (default 'auto')
   - Key parameters: n_estimators, max_depth, learning_rate, subsample, colsample_bytree, gamma

4. LightGBM ('lightgbm')
   - Recommended scaler: StandardScaler (default 'auto')
   - Key parameters: n_estimators, num_leaves, learning_rate, max_depth, feature_fraction, bagging_fraction, min_child_samples, lambda_l1, lambda_l2

5. CatBoost ('catboost')
   - Recommended scaler: StandardScaler (default 'auto')
   - Key parameters: depth, learning_rate, iterations, l2_leaf_reg, random_seed, task_type

6. Gradient Boosting ('gradient_boost')
   - Recommended scaler: RobustScaler (default 'auto')
   - Key parameters: n_estimators, max_depth, learning_rate, min_samples_split, min_samples_leaf, subsample
"""

from Models.pipeline import get_pipeline

"""
Pipelines for all supported models.
Each pipeline is created with the recommended scaler unless you override it.
"""

# 1. Logistic Regression
pipeline_logistic = get_pipeline(
    model_name='logistic',
    numerical_features=features,
    scaler='standard' 
)

# 2. Random Forest
pipeline_random_forest = get_pipeline(
    model_name='random_forest',
    numerical_features=features,
    scaler='robust' 
)

# 3. XGBoost
pipeline_xgb = get_pipeline(
    model_name='xgboost',
    numerical_features=features,
    scaler='robust' 
)

# 4. LightGBM
pipeline_lgbm = get_pipeline(
    model_name='lightgbm',
    numerical_features=features,
    scaler='standard'
)

# 5. CatBoost
pipeline_catboost = get_pipeline(
    model_name='catboost',
    numerical_features=features,
    scaler='standard'
)

# 6. Gradient Boosting
pipeline_gradient_boost = get_pipeline(
    model_name='gradient_boost',
    numerical_features=features,
    scaler='robust' 
)

# If you want all of them in one dict:
all_pipelines = {
    "logistic": pipeline_logistic,
    "random_forest": pipeline_random_forest,
    "xgboost": pipeline_xgb,
    "lightgbm": pipeline_lgbm,
    "catboost": pipeline_catboost,
    "gradient_boost": pipeline_gradient_boost
}

In [15]:
from sklearn.metrics import accuracy_score

# Prepare data
X_train_clean = X_train_split
X_val_clean = X_val_split

results = {}

for name, pipeline in all_pipelines.items():
    print(f"\nTraining {name}...")

    # Fit
    pipeline.fit(X_train_clean, y_train_split)

    # Predict
    preds = pipeline.predict(X_val_clean)

    # Evaluate
    acc = accuracy_score(y_val_split, preds)
    results[name] = acc

    print(f"{name} validation accuracy: {acc:.4f}")

print("\nSummary of results:")
for name, acc in results.items():
    print(f"{name}: {acc:.4f}")



Training logistic...
logistic validation accuracy: 0.7260

Training random_forest...
random_forest validation accuracy: 0.7975

Training xgboost...
xgboost validation accuracy: 0.8210

Training lightgbm...
[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000405 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
lightgbm validation accuracy: 0.8240

Training catboost...
catboost validation accuracy: 0.8255

Training gradient_boost...
gradient_boost validation accuracy: 0.8290

Summary of results:
logistic: 0.7260
random_forest: 0.7975
xgboost: 0.8210
lightgbm: 0.8240
catboost: 0.8255
gradient_boost: 0.8290


In [5]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

X_train_clean = X_train_split
X_val_clean   = X_val_split

results = {}

for name, pipeline in all_pipelines.items():
    print(f"\n=== {name.upper()} ===")

    # Fit
    pipeline.fit(X_train_clean, y_train_split)

    # Predict labels
    preds = pipeline.predict(X_val_clean)

    # Predict probabilities (if available)
    try:
        probs = pipeline.predict_proba(X_val_clean)[:, 1]
    except:
        probs = None

    # Accuracy
    acc = accuracy_score(y_val_split, preds)
    results[name] = acc
    print(f"Validation Accuracy: {acc:.4f}")

    # Classification report
    print("\nClassification Report:")
    print(classification_report(y_val_split, preds))

    # Confusion matrix
    print("Confusion Matrix:")
    print(confusion_matrix(y_val_split, preds))

    # Store predictions & probs if you want to use later
    results[name] = {
        "accuracy": acc,
        "preds": preds,
        "probs": probs
    }

print("\n=== SUMMARY ===")
for name, info in results.items():
    print(f"{name}: {info['accuracy']:.4f}")



=== LOGISTIC ===
Validation Accuracy: 0.7260

Classification Report:
              precision    recall  f1-score   support

       False       0.72      0.75      0.73      1000
        True       0.74      0.70      0.72      1000

    accuracy                           0.73      2000
   macro avg       0.73      0.73      0.73      2000
weighted avg       0.73      0.73      0.73      2000

Confusion Matrix:
[[750 250]
 [298 702]]

=== RANDOM_FOREST ===
Validation Accuracy: 0.7975

Classification Report:
              precision    recall  f1-score   support

       False       0.79      0.81      0.80      1000
        True       0.81      0.79      0.79      1000

    accuracy                           0.80      2000
   macro avg       0.80      0.80      0.80      2000
weighted avg       0.80      0.80      0.80      2000

Confusion Matrix:
[[810 190]
 [215 785]]

=== XGBOOST ===
Validation Accuracy: 0.8210

Classification Report:
              precision    recall  f1-score   supp

In [6]:
from paramethers.cat_grid import param_grid as catboost_param_grid
from paramethers.gb_grid import param_grid_optuna as gradientboost_param_grid
from paramethers.lgb_grid import param_grid as lightgbm_param_grid
from paramethers.log_grid import param_grid as logistic_param_grid
from paramethers.rf_grid import param_grid as randomforest_param_grid
from paramethers.xgb_grid import param_grid_optuna as xgboost_param_grid


from optimisers.gridsearch_optimizer import run_grid_search
from optimisers.optuna_optimizer import optimize_optuna
from optimisers.randomsearch_optimizer import run_random_search


In [7]:
xgboost_param_grid
catboost_param_grid
gradientboost_param_grid


{'classifier__n_estimators': (200, 500),
 'classifier__learning_rate': (0.01, 0.2),
 'classifier__max_depth': (3, 6),
 'classifier__min_samples_split': (2, 10),
 'classifier__min_samples_leaf': (1, 4),
 'classifier__subsample': (0.6, 1.0),
 'classifier__max_features': ['sqrt', 0.5, 0.7, None],
 'classifier__random_state': 42}

In [16]:

# --- XGBOOST ---
best_model, best_params_xgb, best_score_xgb = optimize_optuna(
    lambda: pipeline_xgb,
    X_train_split,
    y_train_split,
    X_val_split,
    y_val_split,
    xgboost_param_grid,
    n_trials=100
)
print("\nBest XGBoost Accuracy:", best_score_xgb)
print("Best XGBoost Params:", best_params_xgb)


# --- LIGHTGBM ---
best_lgbm_model, best_params_lgb, best_score_lgb = optimize_optuna(
    lambda: pipeline_lgbm,
    X_train_split,
    y_train_split,
    X_val_split,
    y_val_split,
    lightgbm_param_grid,
    n_trials=50
)
print("\nBest LightGBM Accuracy:", best_score_lgb)
print("Best LightGBM Params:", best_params_lgb)


# --- GRADIENT BOOSTING ---
best_gb_model, best_params_gb, best_score_gb = optimize_optuna(
    lambda: pipeline_gradient_boost,
    X_train_split,
    y_train_split,
    X_val_split,
    y_val_split,
    gradientboost_param_grid,
    n_trials=50
)
print("\nBest Gradient Boosting Accuracy:", best_score_gb)
print("Best Gradient Boosting Params:", best_params_gb)

'''
# --- CATBOOST ---
best_cat_model, best_params_cat, best_score_cat = optimize_optuna(
    lambda: pipeline_catboost,
    X_train_split,
    y_train_split,
    X_val_split,
    y_val_split,
    catboost_param_grid,
    n_trials=1
)
print("\nBest CatBoost Accuracy:", best_score_cat)
print("Best CatBoost Params:", best_params_cat)
'''

[16:15:50] INFO: Starting Optuna optimization for 100 trials...
[I 2025-11-15 16:15:50,830] A new study created in memory with name: no-name-1ca8f64d-9e14-4cb4-a47c-21622c1afe93
[16:15:51] INFO: Trial 1/100 - Accuracy: 0.8275 - Params: {'classifier__max_depth': 4, 'classifier__learning_rate': 0.10293888592588714, 'classifier__subsample': 0.7305420595595353, 'classifier__colsample_bytree': 0.9055449349390458, 'classifier__min_child_weight': 5, 'classifier__gamma': 1.3362506902312425, 'classifier__reg_lambda': 5.3982143514167955, 'classifier__reg_alpha': 7.113893087618578, 'classifier__n_estimators': 300, 'classifier__random_state': 42, 'classifier__eval_metric': 'logloss', 'classifier__use_label_encoder': False, 'classifier__tree_method': 'hist'}
[I 2025-11-15 16:15:51,133] Trial 0 finished with value: 0.8275 and parameters: {'classifier__max_depth': 4, 'classifier__learning_rate': 0.10293888592588714, 'classifier__subsample': 0.7305420595595353, 'classifier__colsample_bytree': 0.905544


Best XGBoost Accuracy: 0.8325
Best XGBoost Params: {'classifier__max_depth': 6, 'classifier__learning_rate': 0.2034921562174774, 'classifier__subsample': 0.7445451487011843, 'classifier__colsample_bytree': 0.7359287172133291, 'classifier__min_child_weight': 9, 'classifier__gamma': 4.0847542779141435, 'classifier__reg_lambda': 1.6156388865306388, 'classifier__reg_alpha': 2.532868840519198, 'classifier__n_estimators': 300, 'classifier__random_state': 42, 'classifier__eval_metric': 'logloss', 'classifier__use_label_encoder': False, 'classifier__tree_method': 'hist'}
[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000398 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.

[16:16:34] INFO: Trial 1/50 - Accuracy: 0.8210 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.01}
[I 2025-11-15 16:16:34,454] Trial 0 finished with value: 0.821 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.01}. Best is trial 0 with value: 0.821.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000391 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:16:44] INFO: Trial 2/50 - Accuracy: 0.8260 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 31, 'classifier__max_depth': 8, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:16:44,227] Trial 1 finished with value: 0.826 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 31, 'classifier__max_depth': 8, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.001}. Best is trial 1 with value: 0.826.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:16:46] INFO: Trial 3/50 - Accuracy: 0.8250 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 127, 'classifier__max_depth': 5, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 0.01}
[I 2025-11-15 16:16:46,748] Trial 2 finished with value: 0.825 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 127, 'classifier__max_depth': 5, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 0.01}. Best is trial 1 with value: 0.826.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000376 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:16:51] INFO: Trial 4/50 - Accuracy: 0.8270 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 31, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 10.0}
[I 2025-11-15 16:16:51,431] Trial 3 finished with value: 0.827 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 31, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 10.0}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000416 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:16:53] INFO: Trial 5/50 - Accuracy: 0.8255 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 127, 'classifier__max_depth': 5, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.01}
[I 2025-11-15 16:16:53,647] Trial 4 finished with value: 0.8255 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 127, 'classifier__max_depth': 5, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.01}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000387 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:02] INFO: Trial 6/50 - Accuracy: 0.8240 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 31, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:17:02,887] Trial 5 finished with value: 0.824 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 31, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.001}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000375 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:09] INFO: Trial 7/50 - Accuracy: 0.8170 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:17:09,386] Trial 6 finished with value: 0.817 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 5.0}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:10] INFO: Trial 8/50 - Accuracy: 0.8245 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 31, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.1}
[I 2025-11-15 16:17:10,358] Trial 7 finished with value: 0.8245 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 31, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.1}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000440 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:20] INFO: Trial 9/50 - Accuracy: 0.8245 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 31, 'classifier__max_depth': 5, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 0.01}
[I 2025-11-15 16:17:20,616] Trial 8 finished with value: 0.8245 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 31, 'classifier__max_depth': 5, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 0.01}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000391 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:24] INFO: Trial 10/50 - Accuracy: 0.8260 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 31, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.1}
[I 2025-11-15 16:17:24,191] Trial 9 finished with value: 0.826 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 31, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.1}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000376 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:29] INFO: Trial 11/50 - Accuracy: 0.8225 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.1, 'classifier__num_leaves': 255, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 10.0}
[I 2025-11-15 16:17:29,370] Trial 10 finished with value: 0.8225 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.1, 'classifier__num_leaves': 255, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 10.0}. Best is trial 3 with value: 0.827.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006928 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:36] INFO: Trial 12/50 - Accuracy: 0.8275 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 31, 'classifier__max_depth': 8, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:17:36,796] Trial 11 finished with value: 0.8275 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 31, 'classifier__max_depth': 8, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.001}. Best is trial 11 with value: 0.8275.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000076 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:41] INFO: Trial 13/50 - Accuracy: 0.8250 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 63, 'classifier__max_depth': 8, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 10.0}
[I 2025-11-15 16:17:41,529] Trial 12 finished with value: 0.825 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 63, 'classifier__max_depth': 8, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 10.0}. Best is trial 11 with value: 0.8275.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003121 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:52] INFO: Trial 14/50 - Accuracy: 0.8215 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 255, 'classifier__max_depth': 8, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 1.0, 'classifier__lambda_l2': 1.0}




[I 2025-11-15 16:17:52,409] Trial 13 finished with value: 0.8215 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 255, 'classifier__max_depth': 8, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 1.0, 'classifier__lambda_l2': 1.0}. Best is trial 11 with value: 0.8275.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000443 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:17:56] INFO: Trial 15/50 - Accuracy: 0.8270 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 31, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:17:56,540] Trial 14 finished with value: 0.827 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.03, 'classifier__num_leaves': 31, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 20, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 0.001}. Best is trial 11 with value: 0.8275.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000383 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:06] INFO: Trial 16/50 - Accuracy: 0.8275 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 31, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 10.0}
[I 2025-11-15 16:18:06,931] Trial 15 finished with value: 0.8275 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 31, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 5.0, 'classifier__lambda_l2': 10.0}. Best is trial 11 with value: 0.8275.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:11] INFO: Trial 17/50 - Accuracy: 0.8180 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 31, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 1.0}
[I 2025-11-15 16:18:11,643] Trial 16 finished with value: 0.818 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 31, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.7, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 1.0}. Best is trial 11 with value: 0.8275.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000402 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:15] INFO: Trial 18/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}




[I 2025-11-15 16:18:15,287] Trial 17 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000543 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:23] INFO: Trial 19/50 - Accuracy: 0.8275 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 255, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:18:23,219] Trial 18 finished with value: 0.8275 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 255, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:27] INFO: Trial 20/50 - Accuracy: 0.8255 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:18:27,181] Trial 19 finished with value: 0.8255 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000147 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:38] INFO: Trial 21/50 - Accuracy: 0.8235 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.1, 'classifier__num_leaves': 255, 'classifier__max_depth': 8, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 1.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:18:38,855] Trial 20 finished with value: 0.8235 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.1, 'classifier__num_leaves': 255, 'classifier__max_depth': 8, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 1.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000409 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:42] INFO: Trial 22/50 - Accuracy: 0.8280 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:18:42,477] Trial 21 finished with value: 0.828 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000398 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:46] INFO: Trial 23/50 - Accuracy: 0.8280 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}




[I 2025-11-15 16:18:46,086] Trial 22 finished with value: 0.828 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000385 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:49] INFO: Trial 24/50 - Accuracy: 0.8280 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:18:49,819] Trial 23 finished with value: 0.828 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000388 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:18:59] INFO: Trial 25/50 - Accuracy: 0.8280 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:18:59,016] Trial 24 finished with value: 0.828 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000388 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:00] INFO: Trial 26/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:19:00,594] Trial 25 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000456 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

[16:19:02] INFO: Trial 27/50 - Accuracy: 0.8280 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:02,215] Trial 26 finished with value: 0.828 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.



[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:03] INFO: Trial 28/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:19:03,863] Trial 27 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000370 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:05] INFO: Trial 29/50 - Accuracy: 0.8270 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.1}
[I 2025-11-15 16:19:05,491] Trial 28 finished with value: 0.827 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.1}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000482 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:07] INFO: Trial 30/50 - Accuracy: 0.8260 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 1.0}
[I 2025-11-15 16:19:07,152] Trial 29 finished with value: 0.826 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 1.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000435 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:14] INFO: Trial 31/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:14,726] Trial 30 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:16] INFO: Trial 32/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:16,263] Trial 31 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000608 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:17] INFO: Trial 33/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:17,876] Trial 32 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000540 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:19] INFO: Trial 34/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:19,437] Trial 33 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000357 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:20] INFO: Trial 35/50 - Accuracy: 0.8235 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 1.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:19:20,395] Trial 34 finished with value: 0.8235 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 1.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000078 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:31] INFO: Trial 36/50 - Accuracy: 0.8190 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.1, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 0.01}
[I 2025-11-15 16:19:31,291] Trial 35 finished with value: 0.819 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.1, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 0.01}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:33] INFO: Trial 37/50 - Accuracy: 0.8205 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:33,848] Trial 36 finished with value: 0.8205 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:36] INFO: Trial 38/50 - Accuracy: 0.8260 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 63, 'classifier__max_depth': 5, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:19:36,016] Trial 37 finished with value: 0.826 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 63, 'classifier__max_depth': 5, 'classifier__feature_fraction': 1.0, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 10, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000384 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:40] INFO: Trial 39/50 - Accuracy: 0.8255 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:40,374] Trial 38 finished with value: 0.8255 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.01, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000539 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:48] INFO: Trial 40/50 - Accuracy: 0.8280 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 127, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:19:48,138] Trial 39 finished with value: 0.828 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 127, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:49] INFO: Trial 41/50 - Accuracy: 0.8220 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': 5, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.01}
[I 2025-11-15 16:19:49,793] Trial 40 finished with value: 0.822 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': 5, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 0.01}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000383 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:51] INFO: Trial 42/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:51,341] Trial 41 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000378 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:52] INFO: Trial 43/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:19:52,975] Trial 42 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:19:54] INFO: Trial 44/50 - Accuracy: 0.8285 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}




[I 2025-11-15 16:19:54,584] Trial 43 finished with value: 0.8285 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000576 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:20:04] INFO: Trial 45/50 - Accuracy: 0.8125 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 0.1}
[I 2025-11-15 16:20:04,974] Trial 44 finished with value: 0.8125 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.08, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.1, 'classifier__lambda_l2': 0.1}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000696 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:20:06] INFO: Trial 46/50 - Accuracy: 0.8275 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:20:06,567] Trial 45 finished with value: 0.8275 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': 10, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000393 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:20:08] INFO: Trial 47/50 - Accuracy: 0.8245 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 255, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 10.0}
[I 2025-11-15 16:20:08,025] Trial 46 finished with value: 0.8245 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 500, 'classifier__learning_rate': 0.05, 'classifier__num_leaves': 255, 'classifier__max_depth': 3, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 10.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:20:13] INFO: Trial 48/50 - Accuracy: 0.8160 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 0.001}
[I 2025-11-15 16:20:13,852] Trial 47 finished with value: 0.816 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 600, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 63, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.7, 'classifier__bagging_fraction': 0.9, 'classifier__min_child_samples': 50, 'classifier__lambda_l1': 0.001, 'classifier__lambda_l2': 0.001}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000406 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:20:21] INFO: Trial 49/50 - Accuracy: 0.8265 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 1.0}
[I 2025-11-15 16:20:21,062] Trial 48 finished with value: 0.8265 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 400, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 127, 'classifier__max_depth': 12, 'classifier__feature_fraction': 0.8, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 5, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 1.0}. Best is trial 17 with value: 0.8285.


[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000488 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:20:32] INFO: Trial 50/50 - Accuracy: 0.8005 - Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 5.0}
[I 2025-11-15 16:20:32,906] Trial 49 finished with value: 0.8005 and parameters: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 800, 'classifier__learning_rate': 0.15, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 0.8, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 0.01, 'classifier__lambda_l2': 5.0}. Best is trial 17 with value: 0.8285.
[16:20:32] INFO: Optimization finished. Best Accuracy: 0.8285
[16:20:32] INFO: Best Parameters: {'classifier__boosting_type': 'dart', 'classifier__

[LightGBM] [Info] Number of positive: 4000, number of negative: 4000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003954 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1590
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[16:20:40] INFO: Starting Optuna optimization for 50 trials...
[I 2025-11-15 16:20:40,561] A new study created in memory with name: no-name-2ff839d0-fb6c-4c31-a1ff-dee692d7ec52



Best LightGBM Accuracy: 0.8285
Best LightGBM Params: {'classifier__boosting_type': 'dart', 'classifier__n_estimators': 700, 'classifier__learning_rate': 0.2, 'classifier__num_leaves': 255, 'classifier__max_depth': -1, 'classifier__feature_fraction': 0.9, 'classifier__bagging_fraction': 1.0, 'classifier__min_child_samples': 30, 'classifier__lambda_l1': 10.0, 'classifier__lambda_l2': 5.0}


[16:20:44] INFO: Trial 1/50 - Accuracy: 0.8195 - Params: {'classifier__n_estimators': 222, 'classifier__learning_rate': 0.05482866945254448, 'classifier__max_depth': 6, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 1, 'classifier__subsample': 0.7194799429047927, 'classifier__max_features': 0.7, 'classifier__random_state': 42}
[I 2025-11-15 16:20:44,868] Trial 0 finished with value: 0.8195 and parameters: {'classifier__n_estimators': 222, 'classifier__learning_rate': 0.05482866945254448, 'classifier__max_depth': 6, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 1, 'classifier__subsample': 0.7194799429047927, 'classifier__max_features': 0.7}. Best is trial 0 with value: 0.8195.
[16:20:48] INFO: Trial 2/50 - Accuracy: 0.8170 - Params: {'classifier__n_estimators': 309, 'classifier__learning_rate': 0.1614882541394541, 'classifier__max_depth': 3, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.670981


Best Gradient Boosting Accuracy: 0.832
Best Gradient Boosting Params: {'classifier__n_estimators': 328, 'classifier__learning_rate': 0.023093275921250805, 'classifier__max_depth': 3, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.6681042240479405, 'classifier__max_features': 0.5}


'\n# --- CATBOOST ---\nbest_cat_model, best_params_cat, best_score_cat = optimize_optuna(\n    lambda: pipeline_catboost,\n    X_train_split,\n    y_train_split,\n    X_val_split,\n    y_val_split,\n    catboost_param_grid,\n    n_trials=1\n)\nprint("\nBest CatBoost Accuracy:", best_score_cat)\nprint("Best CatBoost Params:", best_params_cat)\n'

In [9]:
import optuna
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import accuracy_score

# --- Clean data ---
X_train_cat = X_train_split[features].fillna(0).copy()
X_val_cat = X_val_split[features].fillna(0).copy()

# Ensure unique names
X_train_cat.columns = [
    f"{c}_{i}" if list(X_train_cat.columns).count(c) > 1 else c
    for i, c in enumerate(X_train_cat.columns)
]
X_val_cat.columns = X_train_cat.columns


# --- Optuna objective ---
def objective(trial):
    params = {
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2, log=True),
        'iterations': trial.suggest_int('iterations', 200, 1200),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 12),
        'random_seed': 42,
        'task_type': 'CPU',
        'verbose': 0,
    }

    train_pool = Pool(X_train_cat, y_train_split)
    val_pool = Pool(X_val_cat, y_val_split)

    model_cat = CatBoostClassifier(**params)
    model_cat.fit(
        train_pool,
        eval_set=val_pool,
        early_stopping_rounds=30
    )

    preds = model_cat.predict(X_val_cat)
    return accuracy_score(y_val_split, preds)


# --- Run search ---
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=50)

print("Best Accuracy:", study.best_value)
print("Best Params:", study.best_params)


# --- Build final model using best params ---
cat_best = CatBoostClassifier(
    depth=study.best_params['depth'],
    learning_rate=study.best_params['learning_rate'],
    iterations=study.best_params['iterations'],
    l2_leaf_reg=study.best_params['l2_leaf_reg'],
    random_seed=42,
    task_type='CPU',
    verbose=0
)

train_pool = Pool(X_train_cat, y_train_split)
cat_best.fit(train_pool)
best_cat_model = cat_best


[I 2025-11-15 15:57:32,589] A new study created in memory with name: no-name-2c1a5aa2-d7f3-47b4-af59-ec624a804ea8
[I 2025-11-15 15:57:33,525] Trial 0 finished with value: 0.8315 and parameters: {'depth': 6, 'learning_rate': 0.17254716573280354, 'iterations': 932, 'l2_leaf_reg': 7.585243326167403}. Best is trial 0 with value: 0.8315.
[I 2025-11-15 15:57:35,061] Trial 1 finished with value: 0.827 and parameters: {'depth': 5, 'learning_rate': 0.015957084694148364, 'iterations': 258, 'l2_leaf_reg': 10.527937603524286}. Best is trial 0 with value: 0.8315.
[I 2025-11-15 15:57:37,687] Trial 2 finished with value: 0.8375 and parameters: {'depth': 8, 'learning_rate': 0.08341106432362087, 'iterations': 220, 'l2_leaf_reg': 11.669008373781937}. Best is trial 2 with value: 0.8375.
[I 2025-11-15 15:57:53,208] Trial 3 finished with value: 0.831 and parameters: {'depth': 9, 'learning_rate': 0.018891200276189388, 'iterations': 382, 'l2_leaf_reg': 3.017449608387772}. Best is trial 2 with value: 0.8375.


Best Accuracy: 0.84
Best Params: {'depth': 8, 'learning_rate': 0.03354527866527389, 'iterations': 511, 'l2_leaf_reg': 10.186780593162137}


In [10]:
import optuna
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score

# --- 2. Scale numeric features ---
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_split[features])
X_val_scaled = scaler.transform(X_val_split[features])

# --- 3. Define Optuna objective for XGBoost ---
def xgb_objective(trial):
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
        'n_estimators': 300,
        'random_state': 42,
        'eval_metric': 'logloss',
        'use_label_encoder': False,
        'tree_method': 'hist',  # faster for tabular data
    }

    model = XGBClassifier(**params)

    # Early stopping
    model.fit(
        X_train_scaled, y_train_split,
        eval_set=[(X_val_scaled, y_val_split)],
        verbose=False
    )

    val_pred = model.predict(X_val_scaled)
    return accuracy_score(y_val_split, val_pred)

# --- 4. Run Optuna study ---
pruner = optuna.pruners.MedianPruner(n_warmup_steps=10)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction='maximize', sampler=sampler, pruner=pruner)
study.optimize(xgb_objective, n_trials=80)

# --- 5. Best XGBoost model ---
best_xgb_params = study.best_params
best_xgb = XGBClassifier(
    **best_xgb_params,
    n_estimators=300,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss',
    tree_method='hist'
)

best_xgb.fit(
    X_train_scaled, y_train_split,
    eval_set=[(X_val_scaled, y_val_split)],
    verbose=False
)

# --- 6. Evaluate ---
val_pred = best_xgb.predict(X_val_scaled)
val_acc = accuracy_score(y_val_split, val_pred)

print("\nBest XGBoost Accuracy:", round(val_acc, 4))
print("\nBest Parameters:")
for k, v in sorted(best_xgb_params.items()):
    print(f"  {k}: {v}")

# Optional: view trial history
#print(study.trials_dataframe(attrs=("number", "value", "params")))


[I 2025-11-15 16:08:41,038] A new study created in memory with name: no-name-35895c9b-736b-4803-90d0-d66b521d2e2f
[I 2025-11-15 16:08:41,712] Trial 0 finished with value: 0.818 and parameters: {'max_depth': 5, 'learning_rate': 0.2536999076681772, 'subsample': 0.892797576724562, 'colsample_bytree': 0.8394633936788146, 'min_child_weight': 2, 'gamma': 0.7799726016810132, 'reg_lambda': 0.0017073967431528124, 'reg_alpha': 2.9154431891537547}. Best is trial 0 with value: 0.818.
[I 2025-11-15 16:08:42,618] Trial 1 finished with value: 0.82 and parameters: {'max_depth': 6, 'learning_rate': 0.11114989443094977, 'subsample': 0.608233797718321, 'colsample_bytree': 0.9879639408647978, 'min_child_weight': 9, 'gamma': 1.0616955533913808, 'reg_lambda': 0.005337032762603957, 'reg_alpha': 0.00541524411940254}. Best is trial 1 with value: 0.82.
[I 2025-11-15 16:08:43,344] Trial 2 finished with value: 0.8285 and parameters: {'max_depth': 4, 'learning_rate': 0.05958389350068958, 'subsample': 0.77277800745


Best XGBoost Accuracy: 0.842

Best Parameters:
  colsample_bytree: 0.7113594634696728
  gamma: 3.685451015111078
  learning_rate: 0.03449309051213638
  max_depth: 7
  min_child_weight: 9
  reg_alpha: 0.01716572534089344
  reg_lambda: 5.698950693580417
  subsample: 0.6427843406338178


In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# --- Base models (already fitted) ---
#xgb_clf = best_model.named_steps["classifier"]
lgbm_clf = best_lgbm_model.named_steps["classifier"]
#gb_clf = best_gb_model.named_steps["classifier"]
#cat_clf = best_cat_model.named_steps["classifier"]


# --- Final estimator with scaling to help convergence ---
final_estimator = make_pipeline(
    StandardScaler(),
    LogisticRegression(max_iter=5000, solver="saga", random_state=42)
)

# --- Stacking classifier ---
stacking_model = StackingClassifier(
    estimators=[
        ("xgb", best_xgb),
        #("lgbm", lgbm_clf),
        #("gb", gb_clf),
        ("cat", best_cat_model)
    ],
    final_estimator=final_estimator,
    passthrough=False,  # only use base model predictions, easier to converge
    n_jobs=-1,
    cv=2  # cross-validated predictions for the final estimator
)

# --- Fit stacking model with preprocessed features ---
X_train_preprocessed = best_model.named_steps["preprocessor"].transform(X_train_split)
X_val_preprocessed = best_model.named_steps["preprocessor"].transform(X_val_split)

stacking_model.fit(X_train_preprocessed, y_train_split)

# --- Predict and evaluate ---
stack_preds = stacking_model.predict(X_val_preprocessed)
from sklearn.metrics import accuracy_score
stack_acc = accuracy_score(y_val_split, stack_preds)
print(f"Stacking Ensemble Validation Accuracy: {stack_acc:.4f}")


Stacking Ensemble Validation Accuracy: 0.8385


In [19]:
final_model = stacking_model

from Submission.submit import save_submission
import pandas as pd

X_test_for_submission = X_test_features[features] 

# Agora o 'final_model' receberá as 58 features que ele espera.
save_submission(X_test_for_submission, final_model, name = 'olya_model')

[INFO] Submission created: submissions\olya_model.csv


'submissions\\olya_model.csv'