<a href="https://colab.research.google.com/github/aymuos/masters-practise-repo/blob/main/TERM2/ML_Lab/Project/industrial-ai-project/Ensemble_methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
%pip install xgboost lightgbm catboost optuna

Note: you may need to restart the kernel to use updated packages.


In [36]:
import pandas as pd
import optuna
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import KNNImputer ,SimpleImputer
from sklearn.metrics import roc_auc_score, log_loss
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import warnings
warnings.filterwarnings('ignore')

In [37]:
# Load the data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [38]:
print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")

Train shape: (15000, 20)
Test shape: (10000, 19)


In [39]:
# Check for the target variable distribution
print("\nTarget variable distribution:")
print(train_df['Status'].value_counts(normalize=True) * 100)


Target variable distribution:
Status
C     67.340000
D     30.246667
CL     2.413333
Name: proportion, dtype: float64


In [40]:
# Exploratory Data Analysis
def analyze_missing_values(df, name):
    missing = df.isnull().sum()
    missing_pct = missing / len(df) * 100
    print(f"\nMissing values in {name} dataset:")
    for col, pct in zip(missing.index, missing_pct):
        if pct > 0:
            print(f"{col}: {pct:.2f}%")

In [41]:
analyze_missing_values(train_df, 'train')
analyze_missing_values(test_df, 'test')


Missing values in train dataset:
Drug: 43.67%
Ascites: 43.61%
Hepatomegaly: 43.67%
Spiders: 43.69%
Cholesterol: 55.58%
Copper: 44.27%
Alk_Phos: 43.70%
SGOT: 43.71%
Tryglicerides: 55.90%
Platelets: 3.85%
Prothrombin: 0.12%

Missing values in test dataset:
Drug: 42.84%
Ascites: 42.82%
Hepatomegaly: 42.87%
Spiders: 42.89%
Cholesterol: 55.47%
Copper: 43.58%
Alk_Phos: 42.91%
SGOT: 42.92%
Tryglicerides: 55.81%
Platelets: 3.63%
Prothrombin: 0.16%


In [42]:
# Separate features and target
X_train = train_df.drop('Status', axis=1).copy()
y_train = train_df['Status'].copy()
X_test = test_df.copy()

In [43]:
# Preprocess the data
def preprocess_data(X_train, y_train, X_test):
    # Encode categorical target
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y_train)
    print(f"\nEncoded target classes: {label_encoder.classes_}")

    # Keep track of original indices
    X_train['original_index'] = X_train.index
    X_test['original_index'] = X_test.index

    # Identify data types
    categorical_cols = X_train.select_dtypes(include=['object']).columns.tolist()
    numeric_cols = X_train.select_dtypes(include=['number']).columns.tolist()
    numeric_cols.remove('original_index')  # Remove the index column we added

    print(f"\nCategorical columns: {categorical_cols}")
    print(f"Numeric columns: {numeric_cols}")

    # Strategy for handling missing values:
    # 1. For categorical: impute with most frequent value
    # 2. For numerical: use KNN imputation

    # Create pipeline for categorical features
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),  # Use most frequent value for categorical imputation
        ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
    ])

    # Create pipeline for numerical features
    numeric_transformer = Pipeline(steps=[
        ('imputer', KNNImputer(n_neighbors=5)),
        ('scaler', StandardScaler())
    ])

    # Combine transformers
    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', categorical_transformer, categorical_cols),
            ('num', numeric_transformer, numeric_cols)
        ])

    # Apply preprocessing
    print("\nPreprocessing data...")
    X_train_processed = preprocessor.fit_transform(X_train)
    X_test_processed = preprocessor.transform(X_test)

    # Get feature names after preprocessing
    categorical_feature_names = []
    if categorical_cols:
        ohe = preprocessor.named_transformers_['cat'].named_steps['encoder']
        categorical_feature_names = ohe.get_feature_names_out(categorical_cols).tolist()

    numeric_feature_names = numeric_cols
    all_feature_names = categorical_feature_names + numeric_feature_names

    print(f"Processed feature count: {len(all_feature_names)}")

    return X_train_processed, y_encoded, X_test_processed, label_encoder, all_feature_names, X_train['original_index'], X_test['original_index']


In [44]:
# Apply preprocessing
X_train_processed, y_encoded, X_test_processed, label_encoder, feature_names, train_indices, test_indices = preprocess_data(X_train, y_train, X_test)



Encoded target classes: ['C' 'CL' 'D']

Categorical columns: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
Numeric columns: ['id', 'N_Days', 'Age', 'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin', 'Stage']

Preprocessing data...
Processed feature count: 27


In [45]:
# Create validation set
X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train_processed, y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)

print(f"\nTraining data shape: {X_train_final.shape}")
print(f"Validation data shape: {X_val.shape}")
print(f"Test data shape: {X_test_processed.shape}")


Training data shape: (12000, 27)
Validation data shape: (3000, 27)
Test data shape: (10000, 27)


In [46]:
# Create and train models
def train_xgboost(X_train, y_train, X_val, y_val):
    print("\nTraining XGBoost model...")
    num_classes = len(np.unique(y_train))

    if num_classes == 2:
        objective = 'binary:logistic'
        eval_metric = 'logloss'
    else:
        objective = 'multi:softprob'
        eval_metric = 'mlogloss'

    model = xgb.XGBClassifier(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=6,
        min_child_weight=1,
        gamma=0,
        subsample=0.8,
        colsample_bytree=0.8,
        objective=objective,
        eval_metric=eval_metric,
        random_state=42,
        early_stopping_rounds=20,
        use_label_encoder=False
    )

    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=False
    )


    # Get validation score
    if num_classes == 2:
        y_pred_val = model.predict_proba(X_val)[:, 1]
        auc = roc_auc_score(y_val, y_pred_val)
        y_pred_val_tensor = np.array(y_pred_val).reshape(-1, 1)
        y_pred_val_complement = 1 - y_pred_val_tensor
        y_pred_val_probs = np.hstack((y_pred_val_complement, y_pred_val_tensor))
        loss = log_loss(y_val, y_pred_val_probs)
    else:
        y_pred_val = model.predict_proba(X_val)
        auc = roc_auc_score(y_val, y_pred_val, multi_class='ovr')
        loss = log_loss(y_val, y_pred_val)

    print(f"XGBoost - Validation AUC: {auc:.4f}, Log Loss: {loss:.4f}")
    return model, auc, loss


In [47]:
# LBGM
def train_lightgbm(X_train, y_train, X_val, y_val):
    print("\nTraining LightGBM model...")
    num_classes = len(np.unique(y_train))

    if num_classes == 2:
        objective = 'binary'
        metric = 'binary_logloss'
    else:
        objective = 'multiclass'
        metric = 'multi_logloss'

    model = lgb.LGBMClassifier(
        boosting_type='gbdt',
        num_leaves=31,
        max_depth=-1,
        learning_rate=0.05,
        n_estimators=500,
        subsample=0.8,
        colsample_bytree=0.8,
        objective=objective,
        random_state=42,
        metric=metric,
        num_class=num_classes if num_classes > 2 else 1
    )

    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        callbacks=[lgb.early_stopping(20, verbose=False)]
    )

    # Get validation score
    if num_classes == 2:
        y_pred_val = model.predict_proba(X_val)[:, 1]
        auc = roc_auc_score(y_val, y_pred_val)
        loss = log_loss(y_val, model.predict_proba(X_val))
    else:
        y_pred_val = model.predict_proba(X_val)
        auc = roc_auc_score(y_val, y_pred_val, multi_class='ovr')
        loss = log_loss(y_val, y_pred_val)

    print(f"LightGBM - Validation AUC: {auc:.4f}, Log Loss: {loss:.4f}")
    return model, auc, loss

In [48]:
def train_catboost(X_train, y_train, X_val, y_val):
    print("\nTraining CatBoost model...")
    num_classes = len(np.unique(y_train))

    if num_classes == 2:
        loss_function = 'Logloss'
    else:
        loss_function = 'MultiClass'

    model = cb.CatBoostClassifier(
        iterations=500,
        learning_rate=0.05,
        depth=6,
        l2_leaf_reg=3,
        loss_function=loss_function,
        eval_metric='AUC',
        random_seed=42,
        verbose=False
    )

    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        early_stopping_rounds=20,
        verbose=False
    )

    # Get validation score
    if num_classes == 2:
        y_pred_val = model.predict_proba(X_val)[:, 1]
        auc = roc_auc_score(y_val, y_pred_val)
        loss = log_loss(y_val, model.predict_proba(X_val))
    else:
        y_pred_val = model.predict_proba(X_val)
        auc = roc_auc_score(y_val, y_pred_val, multi_class='ovr')
        loss = log_loss(y_val, y_pred_val)

    print(f"CatBoost - Validation AUC: {auc:.4f}, Log Loss: {loss:.4f}")
    return model, auc, loss

In [49]:
# using optuna for tuning
def objective(trial):
  # XGB
  xgb_params = {
      "n_estimators": trial.suggest_int("xgb_n_estimators", 100, 1000),
      "learning_rate": trial.suggest_float("xgb_learning_rate", 1e-3, 1e-1, log=True),
      "max_depth": trial.suggest_int("xgb_max_depth", 3, 10)
      }

  lgb_params = {
        "n_estimators": trial.suggest_int("lgb_n_estimators", 100, 1000),
        "learning_rate": trial.suggest_float("lgb_learning_rate", 1e-3, 1e-1, log=True),
        "num_leaves": trial.suggest_int("lgb_num_leaves", 20, 50),
        # ... other LightGBM hyperparameters ...
    }

    # CatBoost hyperparameters
  cb_params = {
        "iterations": trial.suggest_int("cb_iterations", 100, 1000),
        "learning_rate": trial.suggest_float("cb_learning_rate", 1e-3, 1e-1, log=True),
        "depth": trial.suggest_int("cb_depth", 4, 10),
        # ... other CatBoost hyperparameters ...
    }
# Train models with the suggested hyperparameters
  xgb_model = xgb.XGBClassifier(**xgb_params)
  xgb_model.fit(X_train_final, y_train_final, eval_set=[(X_val, y_val)], verbose=False)

  lgb_model = lgb.LGBMClassifier(**lgb_params)
  lgb_model.fit(X_train_final, y_train_final, eval_set=[(X_val, y_val)])

  cb_model = cb.CatBoostClassifier(**cb_params, verbose=False)
  cb_model.fit(X_train_final, y_train_final, eval_set=[(X_val, y_val)], verbose=False)

  # Ensemble predictions (weighted based on inverse log loss)
  num_classes = len(label_encoder.classes_)
  if num_classes == 2:
      val_pred_xgb = xgb_model.predict_proba(X_val)[:, 1]
      val_pred_lgb = lgb_model.predict_proba(X_val)[:, 1]
      val_pred_cb = cb_model.predict_proba(X_val)[:, 1]

  else:
      val_pred_xgb = xgb_model.predict_proba(X_val)
      val_pred_lgb = lgb_model.predict_proba(X_val)
      val_pred_cb = cb_model.predict_proba(X_val)

  # Calculate individual model losses
  xgb_loss = log_loss(y_val, xgb_model.predict_proba(X_val))
  lgb_loss = log_loss(y_val, lgb_model.predict_proba(X_val))
  cb_loss = log_loss(y_val, cb_model.predict_proba(X_val))

  # Calculate ensemble weights (inverse of log loss)
  weights = np.array([1/xgb_loss, 1/lgb_loss, 1/cb_loss])
  weights = weights / weights.sum()  # Normalize to sum to 1

  # Ensemble predictions (weighted)
  val_pred_ensemble = (
      weights[0] * val_pred_xgb +
      weights[1] * val_pred_lgb +
      weights[2] * val_pred_cb
  )

  # Evaluate ensemble
  ensemble_loss = log_loss(y_val, val_pred_ensemble)

  return ensemble_loss


In [50]:
# Create an Optuna study and optimize
study = optuna.create_study(direction="minimize")  # Minimize log loss
study.optimize(objective, n_trials=100)  # Number of trials to run

# Get best hyperparameters
best_params = study.best_params

[I 2025-04-22 20:07:43,338] A new study created in memory with name: no-name-184d53d3-989e-4156-a526-53b3727a85ca


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:08:15,366] Trial 0 finished with value: 0.3800836225777625 and parameters: {'xgb_n_estimators': 899, 'xgb_learning_rate': 0.010667433273686164, 'xgb_max_depth': 6, 'lgb_n_estimators': 796, 'lgb_learning_rate': 0.016481869106326565, 'lgb_num_leaves': 22, 'cb_iterations': 893, 'cb_learning_rate': 0.01265644945797267, 'cb_depth': 6}. Best is trial 0 with value: 0.3800836225777625.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:08:39,888] Trial 1 finished with value: 0.3824620837085683 and parameters: {'xgb_n_estimators': 158, 'xgb_learning_rate': 0.022662172227542233, 'xgb_max_depth': 10, 'lgb_n_estimators': 956, 'lgb_learning_rate': 0.09156664215233488, 'lgb_num_leaves': 34, 'cb_iterations': 710, 'cb_learning_rate': 0.09313185535222032, 'cb_depth': 6}. Best is trial 0 with value: 0.3800836225777625.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002826 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:08:51,866] Trial 2 finished with value: 0.4523020125579404 and parameters: {'xgb_n_estimators': 814, 'xgb_learning_rate': 0.0028918668815296126, 'xgb_max_depth': 5, 'lgb_n_estimators': 361, 'lgb_learning_rate': 0.060241494058718426, 'lgb_num_leaves': 39, 'cb_iterations': 122, 'cb_learning_rate': 0.0020869382690618097, 'cb_depth': 5}. Best is trial 0 with value: 0.3800836225777625.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001010 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:09:24,265] Trial 3 finished with value: 0.4433398958329664 and parameters: {'xgb_n_estimators': 218, 'xgb_learning_rate': 0.002615266030147284, 'xgb_max_depth': 4, 'lgb_n_estimators': 551, 'lgb_learning_rate': 0.0032718187371747055, 'lgb_num_leaves': 31, 'cb_iterations': 228, 'cb_learning_rate': 0.03481891399678998, 'cb_depth': 10}. Best is trial 0 with value: 0.3800836225777625.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000940 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:09:46,626] Trial 4 finished with value: 0.3894588529503927 and parameters: {'xgb_n_estimators': 512, 'xgb_learning_rate': 0.005580722257917343, 'xgb_max_depth': 4, 'lgb_n_estimators': 394, 'lgb_learning_rate': 0.09557999023851248, 'lgb_num_leaves': 35, 'cb_iterations': 429, 'cb_learning_rate': 0.023326511248477105, 'cb_depth': 8}. Best is trial 0 with value: 0.3800836225777625.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000954 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:10:07,159] Trial 5 finished with value: 0.3847903639306557 and parameters: {'xgb_n_estimators': 474, 'xgb_learning_rate': 0.09241484463561098, 'xgb_max_depth': 4, 'lgb_n_estimators': 425, 'lgb_learning_rate': 0.012470454286765313, 'lgb_num_leaves': 46, 'cb_iterations': 895, 'cb_learning_rate': 0.0026368597761481085, 'cb_depth': 6}. Best is trial 0 with value: 0.3800836225777625.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001942 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:10:25,586] Trial 6 finished with value: 0.38747200482285354 and parameters: {'xgb_n_estimators': 430, 'xgb_learning_rate': 0.0531610635696708, 'xgb_max_depth': 8, 'lgb_n_estimators': 691, 'lgb_learning_rate': 0.0024174278873179333, 'lgb_num_leaves': 38, 'cb_iterations': 529, 'cb_learning_rate': 0.02129916265103684, 'cb_depth': 4}. Best is trial 0 with value: 0.3800836225777625.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000592 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:11:03,233] Trial 7 finished with value: 0.37915350892587546 and parameters: {'xgb_n_estimators': 435, 'xgb_learning_rate': 0.05934365720879677, 'xgb_max_depth': 8, 'lgb_n_estimators': 705, 'lgb_learning_rate': 0.007460143754668359, 'lgb_num_leaves': 26, 'cb_iterations': 694, 'cb_learning_rate': 0.024073352535568658, 'cb_depth': 8}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000706 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:11:30,254] Trial 8 finished with value: 0.48262033895935186 and parameters: {'xgb_n_estimators': 929, 'xgb_learning_rate': 0.006116157668488155, 'xgb_max_depth': 9, 'lgb_n_estimators': 123, 'lgb_learning_rate': 0.0016177212182502433, 'lgb_num_leaves': 42, 'cb_iterations': 213, 'cb_learning_rate': 0.002964191606411913, 'cb_depth': 5}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002265 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:11:45,985] Trial 9 finished with value: 0.4157435399600022 and parameters: {'xgb_n_estimators': 430, 'xgb_learning_rate': 0.0309127468746602, 'xgb_max_depth': 10, 'lgb_n_estimators': 203, 'lgb_learning_rate': 0.042256022642450854, 'lgb_num_leaves': 33, 'cb_iterations': 129, 'cb_learning_rate': 0.0029241543258071083, 'cb_depth': 7}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000855 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:12:54,303] Trial 10 finished with value: 0.41886747675930824 and parameters: {'xgb_n_estimators': 690, 'xgb_learning_rate': 0.0010702900304102466, 'xgb_max_depth': 7, 'lgb_n_estimators': 952, 'lgb_learning_rate': 0.005391198031511336, 'lgb_num_leaves': 21, 'cb_iterations': 721, 'cb_learning_rate': 0.07511042582432988, 'cb_depth': 9}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002684 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:13:46,557] Trial 11 finished with value: 0.37991590214138843 and parameters: {'xgb_n_estimators': 706, 'xgb_learning_rate': 0.01712301229503968, 'xgb_max_depth': 6, 'lgb_n_estimators': 753, 'lgb_learning_rate': 0.017528543133045204, 'lgb_num_leaves': 22, 'cb_iterations': 980, 'cb_learning_rate': 0.007242628635876029, 'cb_depth': 8}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:14:36,772] Trial 12 finished with value: 0.3801406310699139 and parameters: {'xgb_n_estimators': 695, 'xgb_learning_rate': 0.019261012244478782, 'xgb_max_depth': 7, 'lgb_n_estimators': 726, 'lgb_learning_rate': 0.02556044111598193, 'lgb_num_leaves': 27, 'cb_iterations': 990, 'cb_learning_rate': 0.005908096404869326, 'cb_depth': 8}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000991 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:15:09,472] Trial 13 finished with value: 0.38169594578131294 and parameters: {'xgb_n_estimators': 306, 'xgb_learning_rate': 0.09825703226449611, 'xgb_max_depth': 6, 'lgb_n_estimators': 594, 'lgb_learning_rate': 0.007124584881833311, 'lgb_num_leaves': 26, 'cb_iterations': 706, 'cb_learning_rate': 0.006493160605813367, 'cb_depth': 8}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:16:55,552] Trial 14 finished with value: 0.4050991907325124 and parameters: {'xgb_n_estimators': 642, 'xgb_learning_rate': 0.042168288433319184, 'xgb_max_depth': 8, 'lgb_n_estimators': 832, 'lgb_learning_rate': 0.019095749070997176, 'lgb_num_leaves': 27, 'cb_iterations': 810, 'cb_learning_rate': 0.0010332943063330177, 'cb_depth': 10}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:17:40,916] Trial 15 finished with value: 0.3859657909762081 and parameters: {'xgb_n_estimators': 329, 'xgb_learning_rate': 0.013668697759183059, 'xgb_max_depth': 8, 'lgb_n_estimators': 653, 'lgb_learning_rate': 0.007905231812625396, 'lgb_num_leaves': 20, 'cb_iterations': 559, 'cb_learning_rate': 0.010822033919292161, 'cb_depth': 9}. Best is trial 7 with value: 0.37915350892587546.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002447 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:18:12,004] Trial 16 finished with value: 0.377341681850683 and parameters: {'xgb_n_estimators': 596, 'xgb_learning_rate': 0.059211400680816414, 'xgb_max_depth': 3, 'lgb_n_estimators': 814, 'lgb_learning_rate': 0.0316271954261077, 'lgb_num_leaves': 30, 'cb_iterations': 998, 'cb_learning_rate': 0.04454872560864225, 'cb_depth': 7}. Best is trial 16 with value: 0.377341681850683.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000942 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:18:34,066] Trial 17 finished with value: 0.37819484199525355 and parameters: {'xgb_n_estimators': 567, 'xgb_learning_rate': 0.05697905922338553, 'xgb_max_depth': 3, 'lgb_n_estimators': 894, 'lgb_learning_rate': 0.03309314397915251, 'lgb_num_leaves': 30, 'cb_iterations': 583, 'cb_learning_rate': 0.0410133015055108, 'cb_depth': 7}. Best is trial 16 with value: 0.377341681850683.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:18:52,373] Trial 18 finished with value: 0.37879963667326577 and parameters: {'xgb_n_estimators': 596, 'xgb_learning_rate': 0.03278760816439692, 'xgb_max_depth': 3, 'lgb_n_estimators': 873, 'lgb_learning_rate': 0.032309320661599826, 'lgb_num_leaves': 30, 'cb_iterations': 401, 'cb_learning_rate': 0.06698964031031811, 'cb_depth': 7}. Best is trial 16 with value: 0.377341681850683.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001058 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:19:19,655] Trial 19 finished with value: 0.3761819338771623 and parameters: {'xgb_n_estimators': 804, 'xgb_learning_rate': 0.06576998504264364, 'xgb_max_depth': 3, 'lgb_n_estimators': 994, 'lgb_learning_rate': 0.04523350859140388, 'lgb_num_leaves': 48, 'cb_iterations': 576, 'cb_learning_rate': 0.04150659125973083, 'cb_depth': 7}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000659 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:19:38,425] Trial 20 finished with value: 0.37621794983557477 and parameters: {'xgb_n_estimators': 797, 'xgb_learning_rate': 0.07187631403461314, 'xgb_max_depth': 3, 'lgb_n_estimators': 1000, 'lgb_learning_rate': 0.0550490621944579, 'lgb_num_leaves': 50, 'cb_iterations': 365, 'cb_learning_rate': 0.049673174916656876, 'cb_depth': 5}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:19:56,707] Trial 21 finished with value: 0.3765622366126065 and parameters: {'xgb_n_estimators': 842, 'xgb_learning_rate': 0.08111742205081816, 'xgb_max_depth': 3, 'lgb_n_estimators': 978, 'lgb_learning_rate': 0.05656179776422306, 'lgb_num_leaves': 50, 'cb_iterations': 363, 'cb_learning_rate': 0.04673962605466098, 'cb_depth': 4}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001085 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:20:17,998] Trial 22 finished with value: 0.3873350859267463 and parameters: {'xgb_n_estimators': 996, 'xgb_learning_rate': 0.08108619794356807, 'xgb_max_depth': 5, 'lgb_n_estimators': 999, 'lgb_learning_rate': 0.06834765951771098, 'lgb_num_leaves': 50, 'cb_iterations': 358, 'cb_learning_rate': 0.05496326616242389, 'cb_depth': 4}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000591 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:20:36,238] Trial 23 finished with value: 0.37892885695144757 and parameters: {'xgb_n_estimators': 800, 'xgb_learning_rate': 0.0346097544048203, 'xgb_max_depth': 3, 'lgb_n_estimators': 1000, 'lgb_learning_rate': 0.052698318378366985, 'lgb_num_leaves': 50, 'cb_iterations': 296, 'cb_learning_rate': 0.030865805489060344, 'cb_depth': 5}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000954 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:20:54,085] Trial 24 finished with value: 0.3807720460566202 and parameters: {'xgb_n_estimators': 795, 'xgb_learning_rate': 0.06902022947203504, 'xgb_max_depth': 4, 'lgb_n_estimators': 911, 'lgb_learning_rate': 0.048199554609820894, 'lgb_num_leaves': 47, 'cb_iterations': 458, 'cb_learning_rate': 0.01594411940213591, 'cb_depth': 4}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000725 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:21:14,562] Trial 25 finished with value: 0.3792464428919343 and parameters: {'xgb_n_estimators': 867, 'xgb_learning_rate': 0.025773099540419727, 'xgb_max_depth': 5, 'lgb_n_estimators': 889, 'lgb_learning_rate': 0.07609267926666005, 'lgb_num_leaves': 45, 'cb_iterations': 491, 'cb_learning_rate': 0.05661322024068951, 'cb_depth': 5}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:21:33,064] Trial 26 finished with value: 0.3777745090371966 and parameters: {'xgb_n_estimators': 981, 'xgb_learning_rate': 0.04205908409175379, 'xgb_max_depth': 3, 'lgb_n_estimators': 930, 'lgb_learning_rate': 0.04328686010158381, 'lgb_num_leaves': 43, 'cb_iterations': 613, 'cb_learning_rate': 0.015699937089567816, 'cb_depth': 4}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001123 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:21:51,162] Trial 27 finished with value: 0.381078859318258 and parameters: {'xgb_n_estimators': 766, 'xgb_learning_rate': 0.07866040667270428, 'xgb_max_depth': 4, 'lgb_n_estimators': 830, 'lgb_learning_rate': 0.024429330285318008, 'lgb_num_leaves': 49, 'cb_iterations': 306, 'cb_learning_rate': 0.08445643581809541, 'cb_depth': 6}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001351 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:22:12,345] Trial 28 finished with value: 0.38095687297388336 and parameters: {'xgb_n_estimators': 848, 'xgb_learning_rate': 0.04578035711084073, 'xgb_max_depth': 5, 'lgb_n_estimators': 995, 'lgb_learning_rate': 0.01305635925934628, 'lgb_num_leaves': 48, 'cb_iterations': 364, 'cb_learning_rate': 0.03334917651401262, 'cb_depth': 5}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:22:30,186] Trial 29 finished with value: 0.3821010105788899 and parameters: {'xgb_n_estimators': 920, 'xgb_learning_rate': 0.007645786064833529, 'xgb_max_depth': 3, 'lgb_n_estimators': 806, 'lgb_learning_rate': 0.06736583859295656, 'lgb_num_leaves': 44, 'cb_iterations': 642, 'cb_learning_rate': 0.05219107746881533, 'cb_depth': 4}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001124 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:22:49,526] Trial 30 finished with value: 0.378378467820336 and parameters: {'xgb_n_estimators': 745, 'xgb_learning_rate': 0.009761869150135976, 'xgb_max_depth': 4, 'lgb_n_estimators': 754, 'lgb_learning_rate': 0.02118744336986677, 'lgb_num_leaves': 40, 'cb_iterations': 496, 'cb_learning_rate': 0.09934283183763311, 'cb_depth': 6}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000970 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:23:18,881] Trial 31 finished with value: 0.37669947319885416 and parameters: {'xgb_n_estimators': 613, 'xgb_learning_rate': 0.06781522641862052, 'xgb_max_depth': 3, 'lgb_n_estimators': 862, 'lgb_learning_rate': 0.033050047534902934, 'lgb_num_leaves': 47, 'cb_iterations': 811, 'cb_learning_rate': 0.045828827411131506, 'cb_depth': 7}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001009 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:23:44,431] Trial 32 finished with value: 0.3766898383265211 and parameters: {'xgb_n_estimators': 880, 'xgb_learning_rate': 0.09804636076035975, 'xgb_max_depth': 3, 'lgb_n_estimators': 951, 'lgb_learning_rate': 0.09801929818989855, 'lgb_num_leaves': 47, 'cb_iterations': 833, 'cb_learning_rate': 0.028156105294108936, 'cb_depth': 6}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000641 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:24:02,178] Trial 33 finished with value: 0.384207662205218 and parameters: {'xgb_n_estimators': 883, 'xgb_learning_rate': 0.09583137254395489, 'xgb_max_depth': 4, 'lgb_n_estimators': 945, 'lgb_learning_rate': 0.09343791899943979, 'lgb_num_leaves': 48, 'cb_iterations': 262, 'cb_learning_rate': 0.028694736289444855, 'cb_depth': 5}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001112 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:24:18,008] Trial 34 finished with value: 0.3818025812404899 and parameters: {'xgb_n_estimators': 940, 'xgb_learning_rate': 0.09999892098376449, 'xgb_max_depth': 3, 'lgb_n_estimators': 950, 'lgb_learning_rate': 0.06126655452714324, 'lgb_num_leaves': 41, 'cb_iterations': 186, 'cb_learning_rate': 0.016144270831254064, 'cb_depth': 6}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000913 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:24:40,740] Trial 35 finished with value: 0.3845714929384371 and parameters: {'xgb_n_estimators': 852, 'xgb_learning_rate': 0.044845015941136206, 'xgb_max_depth': 5, 'lgb_n_estimators': 507, 'lgb_learning_rate': 0.08032343544120686, 'lgb_num_leaves': 50, 'cb_iterations': 803, 'cb_learning_rate': 0.06790247821714672, 'cb_depth': 6}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000989 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:25:00,298] Trial 36 finished with value: 0.3952821498111809 and parameters: {'xgb_n_estimators': 752, 'xgb_learning_rate': 0.002774115080882771, 'xgb_max_depth': 4, 'lgb_n_estimators': 998, 'lgb_learning_rate': 0.05303309755334399, 'lgb_num_leaves': 45, 'cb_iterations': 400, 'cb_learning_rate': 0.03830071025692879, 'cb_depth': 6}. Best is trial 19 with value: 0.3761819338771623.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:25:20,814] Trial 37 finished with value: 0.37503036442735355 and parameters: {'xgb_n_estimators': 833, 'xgb_learning_rate': 0.07211154924495612, 'xgb_max_depth': 3, 'lgb_n_estimators': 887, 'lgb_learning_rate': 0.09385065548162906, 'lgb_num_leaves': 37, 'cb_iterations': 920, 'cb_learning_rate': 0.021606466822068078, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000929 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:25:42,291] Trial 38 finished with value: 0.37813161104410664 and parameters: {'xgb_n_estimators': 811, 'xgb_learning_rate': 0.0257777837233561, 'xgb_max_depth': 4, 'lgb_n_estimators': 862, 'lgb_learning_rate': 0.0404715462697839, 'lgb_num_leaves': 37, 'cb_iterations': 934, 'cb_learning_rate': 0.022929030668370393, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000996 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:25:52,809] Trial 39 finished with value: 0.3985361028354106 and parameters: {'xgb_n_estimators': 663, 'xgb_learning_rate': 0.0039111819963425325, 'xgb_max_depth': 5, 'lgb_n_estimators': 287, 'lgb_learning_rate': 0.014176270220722537, 'lgb_num_leaves': 36, 'cb_iterations': 311, 'cb_learning_rate': 0.019990191950648672, 'cb_depth': 4}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001155 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:26:07,336] Trial 40 finished with value: 0.38072316539065 and parameters: {'xgb_n_estimators': 523, 'xgb_learning_rate': 0.07236627706785319, 'xgb_max_depth': 4, 'lgb_n_estimators': 766, 'lgb_learning_rate': 0.07610138437717492, 'lgb_num_leaves': 43, 'cb_iterations': 439, 'cb_learning_rate': 0.012853948870045956, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:26:32,202] Trial 41 finished with value: 0.3759706209810631 and parameters: {'xgb_n_estimators': 893, 'xgb_learning_rate': 0.05839983919900566, 'xgb_max_depth': 3, 'lgb_n_estimators': 939, 'lgb_learning_rate': 0.09543066854764302, 'lgb_num_leaves': 47, 'cb_iterations': 884, 'cb_learning_rate': 0.02618891736689533, 'cb_depth': 6}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000880 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:26:54,533] Trial 42 finished with value: 0.3752386056809724 and parameters: {'xgb_n_estimators': 945, 'xgb_learning_rate': 0.05172518249022579, 'xgb_max_depth': 3, 'lgb_n_estimators': 922, 'lgb_learning_rate': 0.05422091662938704, 'lgb_num_leaves': 46, 'cb_iterations': 894, 'cb_learning_rate': 0.03689463096839152, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001020 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:27:15,929] Trial 43 finished with value: 0.37581362498990756 and parameters: {'xgb_n_estimators': 932, 'xgb_learning_rate': 0.05111857896569451, 'xgb_max_depth': 3, 'lgb_n_estimators': 911, 'lgb_learning_rate': 0.0831535693672642, 'lgb_num_leaves': 45, 'cb_iterations': 884, 'cb_learning_rate': 0.03529536470845522, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001081 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:27:44,185] Trial 44 finished with value: 0.3786164718557073 and parameters: {'xgb_n_estimators': 931, 'xgb_learning_rate': 0.05050878541460734, 'xgb_max_depth': 4, 'lgb_n_estimators': 909, 'lgb_learning_rate': 0.09673208293664332, 'lgb_num_leaves': 45, 'cb_iterations': 868, 'cb_learning_rate': 0.01959819189781241, 'cb_depth': 6}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000881 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:28:02,083] Trial 45 finished with value: 0.37616561861969466 and parameters: {'xgb_n_estimators': 974, 'xgb_learning_rate': 0.037450542557901735, 'xgb_max_depth': 3, 'lgb_n_estimators': 465, 'lgb_learning_rate': 0.06890957521135531, 'lgb_num_leaves': 40, 'cb_iterations': 912, 'cb_learning_rate': 0.0258188513043142, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001010 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:28:21,500] Trial 46 finished with value: 0.3779749814348498 and parameters: {'xgb_n_estimators': 967, 'xgb_learning_rate': 0.03324346791891216, 'xgb_max_depth': 4, 'lgb_n_estimators': 461, 'lgb_learning_rate': 0.0781850156841615, 'lgb_num_leaves': 39, 'cb_iterations': 927, 'cb_learning_rate': 0.02593275749321477, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002612 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 20:28:39,769] Trial 47 finished with value: 0.38543693098518916 and parameters: {'xgb_n_estimators': 956, 'xgb_learning_rate': 0.02151419195990064, 'xgb_max_depth': 3, 'lgb_n_estimators': 650, 'lgb_learning_rate': 0.004486990677831667, 'lgb_num_leaves': 33, 'cb_iterations': 760, 'cb_learning_rate': 0.009948704132000748, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000770 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:11:48,061] Trial 48 finished with value: 0.3986228511629221 and parameters: {'xgb_n_estimators': 903, 'xgb_learning_rate': 0.001593094967480972, 'xgb_max_depth': 10, 'lgb_n_estimators': 388, 'lgb_learning_rate': 0.06472914439717317, 'lgb_num_leaves': 41, 'cb_iterations': 881, 'cb_learning_rate': 0.033519610342168514, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001171 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:12:04,765] Trial 49 finished with value: 0.44593304235906195 and parameters: {'xgb_n_estimators': 100, 'xgb_learning_rate': 0.016307268181975546, 'xgb_max_depth': 4, 'lgb_n_estimators': 334, 'lgb_learning_rate': 0.0016723517699881703, 'lgb_num_leaves': 39, 'cb_iterations': 958, 'cb_learning_rate': 0.008518451908633888, 'cb_depth': 6}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001200 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:12:42,373] Trial 50 finished with value: 0.3831172761201607 and parameters: {'xgb_n_estimators': 995, 'xgb_learning_rate': 0.037968667866155964, 'xgb_max_depth': 9, 'lgb_n_estimators': 470, 'lgb_learning_rate': 0.03772312097215089, 'lgb_num_leaves': 34, 'cb_iterations': 758, 'cb_learning_rate': 0.018102382282198203, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001762 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:13:18,350] Trial 51 finished with value: 0.37770314845111774 and parameters: {'xgb_n_estimators': 906, 'xgb_learning_rate': 0.06104849519864132, 'xgb_max_depth': 3, 'lgb_n_estimators': 863, 'lgb_learning_rate': 0.08675789580054097, 'lgb_num_leaves': 43, 'cb_iterations': 909, 'cb_learning_rate': 0.036040778365571985, 'cb_depth': 7}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000563 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:13:41,884] Trial 52 finished with value: 0.3763709909762264 and parameters: {'xgb_n_estimators': 952, 'xgb_learning_rate': 0.05442612874284371, 'xgb_max_depth': 3, 'lgb_n_estimators': 588, 'lgb_learning_rate': 0.04531281408189673, 'lgb_num_leaves': 46, 'cb_iterations': 857, 'cb_learning_rate': 0.023280664443483246, 'cb_depth': 6}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002101 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:14:13,514] Trial 53 finished with value: 0.3772169422615355 and parameters: {'xgb_n_estimators': 828, 'xgb_learning_rate': 0.027856712992703904, 'xgb_max_depth': 3, 'lgb_n_estimators': 919, 'lgb_learning_rate': 0.07243818806574961, 'lgb_num_leaves': 37, 'cb_iterations': 954, 'cb_learning_rate': 0.03914074556251568, 'cb_depth': 7}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:14:33,473] Trial 54 finished with value: 0.3765711803909281 and parameters: {'xgb_n_estimators': 899, 'xgb_learning_rate': 0.04930083620894466, 'xgb_max_depth': 3, 'lgb_n_estimators': 836, 'lgb_learning_rate': 0.061123522147630456, 'lgb_num_leaves': 42, 'cb_iterations': 770, 'cb_learning_rate': 0.014077997901347292, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001923 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:14:59,044] Trial 55 finished with value: 0.3752795230433386 and parameters: {'xgb_n_estimators': 998, 'xgb_learning_rate': 0.03783606006561589, 'xgb_max_depth': 3, 'lgb_n_estimators': 778, 'lgb_learning_rate': 0.026383161762085047, 'lgb_num_leaves': 46, 'cb_iterations': 907, 'cb_learning_rate': 0.06177656167974053, 'cb_depth': 6}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000929 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:15:25,231] Trial 56 finished with value: 0.37700303538855723 and parameters: {'xgb_n_estimators': 963, 'xgb_learning_rate': 0.03635109483300885, 'xgb_max_depth': 4, 'lgb_n_estimators': 781, 'lgb_learning_rate': 0.027879277482446327, 'lgb_num_leaves': 44, 'cb_iterations': 900, 'cb_learning_rate': 0.06360493623965043, 'cb_depth': 6}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000582 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:15:49,934] Trial 57 finished with value: 0.38605721172481344 and parameters: {'xgb_n_estimators': 998, 'xgb_learning_rate': 0.02161845333234651, 'xgb_max_depth': 7, 'lgb_n_estimators': 564, 'lgb_learning_rate': 0.09934584915316833, 'lgb_num_leaves': 46, 'cb_iterations': 849, 'cb_learning_rate': 0.0038810591823688705, 'cb_depth': 4}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002242 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:16:14,039] Trial 58 finished with value: 0.3770207624999957 and parameters: {'xgb_n_estimators': 921, 'xgb_learning_rate': 0.029411456347606658, 'xgb_max_depth': 3, 'lgb_n_estimators': 700, 'lgb_learning_rate': 0.03732469508911112, 'lgb_num_leaves': 38, 'cb_iterations': 952, 'cb_learning_rate': 0.07910153231481866, 'cb_depth': 6}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002931 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:16:35,061] Trial 59 finished with value: 0.4008711176052191 and parameters: {'xgb_n_estimators': 875, 'xgb_learning_rate': 0.03944476677152172, 'xgb_max_depth': 3, 'lgb_n_estimators': 661, 'lgb_learning_rate': 0.08174275720271972, 'lgb_num_leaves': 41, 'cb_iterations': 984, 'cb_learning_rate': 0.0011317462078747516, 'cb_depth': 5}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000633 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:16:50,722] Trial 60 finished with value: 0.39626241093549364 and parameters: {'xgb_n_estimators': 371, 'xgb_learning_rate': 0.056817458336509676, 'xgb_max_depth': 3, 'lgb_n_estimators': 797, 'lgb_learning_rate': 0.001137308895494795, 'lgb_num_leaves': 42, 'cb_iterations': 675, 'cb_learning_rate': 0.02664315045243817, 'cb_depth': 4}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001029 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:17:37,926] Trial 61 finished with value: 0.3771505935854672 and parameters: {'xgb_n_estimators': 949, 'xgb_learning_rate': 0.06759131033568629, 'xgb_max_depth': 3, 'lgb_n_estimators': 894, 'lgb_learning_rate': 0.04835595346994447, 'lgb_num_leaves': 48, 'cb_iterations': 896, 'cb_learning_rate': 0.04145266328129061, 'cb_depth': 8}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000732 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:18:10,188] Trial 62 finished with value: 0.37596478151309265 and parameters: {'xgb_n_estimators': 721, 'xgb_learning_rate': 0.07970192288032496, 'xgb_max_depth': 3, 'lgb_n_estimators': 927, 'lgb_learning_rate': 0.010118113063809853, 'lgb_num_leaves': 49, 'cb_iterations': 824, 'cb_learning_rate': 0.03179792698055542, 'cb_depth': 7}. Best is trial 37 with value: 0.37503036442735355.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001271 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:18:32,272] Trial 63 finished with value: 0.37491112734076737 and parameters: {'xgb_n_estimators': 770, 'xgb_learning_rate': 0.08622142930331655, 'xgb_max_depth': 3, 'lgb_n_estimators': 840, 'lgb_learning_rate': 0.008843919937978402, 'lgb_num_leaves': 49, 'cb_iterations': 825, 'cb_learning_rate': 0.06250641008765537, 'cb_depth': 5}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001161 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:19:37,854] Trial 64 finished with value: 0.37878886306100157 and parameters: {'xgb_n_estimators': 749, 'xgb_learning_rate': 0.08388661769605253, 'xgb_max_depth': 4, 'lgb_n_estimators': 727, 'lgb_learning_rate': 0.00973708787687048, 'lgb_num_leaves': 49, 'cb_iterations': 825, 'cb_learning_rate': 0.06316767094290132, 'cb_depth': 9}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002927 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:20:09,324] Trial 65 finished with value: 0.37963521154158497 and parameters: {'xgb_n_estimators': 710, 'xgb_learning_rate': 0.08419970601138631, 'xgb_max_depth': 4, 'lgb_n_estimators': 842, 'lgb_learning_rate': 0.008575936285662045, 'lgb_num_leaves': 49, 'cb_iterations': 789, 'cb_learning_rate': 0.08478255779566131, 'cb_depth': 7}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002214 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:20:35,111] Trial 66 finished with value: 0.3757017686200603 and parameters: {'xgb_n_estimators': 830, 'xgb_learning_rate': 0.060081121709497184, 'xgb_max_depth': 3, 'lgb_n_estimators': 967, 'lgb_learning_rate': 0.0063841551197576524, 'lgb_num_leaves': 46, 'cb_iterations': 742, 'cb_learning_rate': 0.05609796237557317, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:20:58,115] Trial 67 finished with value: 0.37657318344605567 and parameters: {'xgb_n_estimators': 720, 'xgb_learning_rate': 0.04727460036439969, 'xgb_max_depth': 3, 'lgb_n_estimators': 971, 'lgb_learning_rate': 0.0062878179042972285, 'lgb_num_leaves': 44, 'cb_iterations': 850, 'cb_learning_rate': 0.05490409178810527, 'cb_depth': 5}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001917 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:21:26,445] Trial 68 finished with value: 0.378180379393953 and parameters: {'xgb_n_estimators': 789, 'xgb_learning_rate': 0.07424767994725946, 'xgb_max_depth': 6, 'lgb_n_estimators': 885, 'lgb_learning_rate': 0.0038079992461993377, 'lgb_num_leaves': 45, 'cb_iterations': 722, 'cb_learning_rate': 0.07329454390937866, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001765 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:22:07,495] Trial 69 finished with value: 0.37907557014753307 and parameters: {'xgb_n_estimators': 825, 'xgb_learning_rate': 0.0863133930060306, 'xgb_max_depth': 9, 'lgb_n_estimators': 912, 'lgb_learning_rate': 0.005582792335800621, 'lgb_num_leaves': 46, 'cb_iterations': 746, 'cb_learning_rate': 0.04787231666599831, 'cb_depth': 7}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:22:31,038] Trial 70 finished with value: 0.3770870818621645 and parameters: {'xgb_n_estimators': 681, 'xgb_learning_rate': 0.06268552554264191, 'xgb_max_depth': 3, 'lgb_n_estimators': 823, 'lgb_learning_rate': 0.011393350146141575, 'lgb_num_leaves': 49, 'cb_iterations': 784, 'cb_learning_rate': 0.0322543182877665, 'cb_depth': 5}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:23:02,863] Trial 71 finished with value: 0.37676686154180367 and parameters: {'xgb_n_estimators': 779, 'xgb_learning_rate': 0.05442414823111858, 'xgb_max_depth': 3, 'lgb_n_estimators': 965, 'lgb_learning_rate': 0.015774692388901876, 'lgb_num_leaves': 47, 'cb_iterations': 877, 'cb_learning_rate': 0.05630085554833696, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001273 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:23:29,588] Trial 72 finished with value: 0.37797730072426505 and parameters: {'xgb_n_estimators': 864, 'xgb_learning_rate': 0.06417412598047174, 'xgb_max_depth': 3, 'lgb_n_estimators': 931, 'lgb_learning_rate': 0.002708946756804798, 'lgb_num_leaves': 47, 'cb_iterations': 835, 'cb_learning_rate': 0.029959639920344807, 'cb_depth': 5}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003050 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:24:01,051] Trial 73 finished with value: 0.3764174867393167 and parameters: {'xgb_n_estimators': 830, 'xgb_learning_rate': 0.04343245952192286, 'xgb_max_depth': 3, 'lgb_n_estimators': 936, 'lgb_learning_rate': 0.010444703053375623, 'lgb_num_leaves': 48, 'cb_iterations': 935, 'cb_learning_rate': 0.045516657308376916, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002249 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:24:33,284] Trial 74 finished with value: 0.3764288973256739 and parameters: {'xgb_n_estimators': 731, 'xgb_learning_rate': 0.07849932903666657, 'xgb_max_depth': 4, 'lgb_n_estimators': 854, 'lgb_learning_rate': 0.006635822814992977, 'lgb_num_leaves': 46, 'cb_iterations': 1000, 'cb_learning_rate': 0.036441690784807655, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:25:09,098] Trial 75 finished with value: 0.37597800400816656 and parameters: {'xgb_n_estimators': 850, 'xgb_learning_rate': 0.05281784899144417, 'xgb_max_depth': 3, 'lgb_n_estimators': 970, 'lgb_learning_rate': 0.008871391683817769, 'lgb_num_leaves': 49, 'cb_iterations': 819, 'cb_learning_rate': 0.09103173668186645, 'cb_depth': 7}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008032 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:26:15,213] Trial 76 finished with value: 0.3784759658293698 and parameters: {'xgb_n_estimators': 889, 'xgb_learning_rate': 0.08968670655428301, 'xgb_max_depth': 3, 'lgb_n_estimators': 887, 'lgb_learning_rate': 0.020666159535486794, 'lgb_num_leaves': 28, 'cb_iterations': 886, 'cb_learning_rate': 0.05937593779468438, 'cb_depth': 8}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:26:51,834] Trial 77 finished with value: 0.3775023746977892 and parameters: {'xgb_n_estimators': 924, 'xgb_learning_rate': 0.07560953650178075, 'xgb_max_depth': 4, 'lgb_n_estimators': 900, 'lgb_learning_rate': 0.004631468879169027, 'lgb_num_leaves': 44, 'cb_iterations': 965, 'cb_learning_rate': 0.021509183742752142, 'cb_depth': 5}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002335 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:27:20,813] Trial 78 finished with value: 0.3767525915494698 and parameters: {'xgb_n_estimators': 771, 'xgb_learning_rate': 0.06044386406574868, 'xgb_max_depth': 4, 'lgb_n_estimators': 811, 'lgb_learning_rate': 0.007548589632622354, 'lgb_num_leaves': 48, 'cb_iterations': 727, 'cb_learning_rate': 0.07012426039544989, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002141 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:27:44,166] Trial 79 finished with value: 0.37705397052373324 and parameters: {'xgb_n_estimators': 634, 'xgb_learning_rate': 0.04750924757584206, 'xgb_max_depth': 3, 'lgb_n_estimators': 948, 'lgb_learning_rate': 0.08649876234961112, 'lgb_num_leaves': 45, 'cb_iterations': 796, 'cb_learning_rate': 0.04272403784356902, 'cb_depth': 4}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001514 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:28:11,093] Trial 80 finished with value: 0.3821363096408673 and parameters: {'xgb_n_estimators': 862, 'xgb_learning_rate': 0.004785381664810317, 'xgb_max_depth': 3, 'lgb_n_estimators': 778, 'lgb_learning_rate': 0.016617682712844393, 'lgb_num_leaves': 47, 'cb_iterations': 921, 'cb_learning_rate': 0.05256772191443948, 'cb_depth': 5}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000979 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:29:04,722] Trial 81 finished with value: 0.3763831922575658 and parameters: {'xgb_n_estimators': 845, 'xgb_learning_rate': 0.05248031746192329, 'xgb_max_depth': 3, 'lgb_n_estimators': 967, 'lgb_learning_rate': 0.009325070056427232, 'lgb_num_leaves': 50, 'cb_iterations': 820, 'cb_learning_rate': 0.09188065521882437, 'cb_depth': 7}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:29:49,596] Trial 82 finished with value: 0.37772483729619155 and parameters: {'xgb_n_estimators': 811, 'xgb_learning_rate': 0.04147249142350577, 'xgb_max_depth': 3, 'lgb_n_estimators': 935, 'lgb_learning_rate': 0.01364151329717722, 'lgb_num_leaves': 49, 'cb_iterations': 682, 'cb_learning_rate': 0.07862351798931301, 'cb_depth': 7}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000601 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:30:28,909] Trial 83 finished with value: 0.37518186814192417 and parameters: {'xgb_n_estimators': 909, 'xgb_learning_rate': 0.06855182566014234, 'xgb_max_depth': 3, 'lgb_n_estimators': 877, 'lgb_learning_rate': 0.008510741029440787, 'lgb_num_leaves': 49, 'cb_iterations': 862, 'cb_learning_rate': 0.09699351864185216, 'cb_depth': 7}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000836 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:31:16,820] Trial 84 finished with value: 0.37630377814331145 and parameters: {'xgb_n_estimators': 931, 'xgb_learning_rate': 0.068937084302895, 'xgb_max_depth': 3, 'lgb_n_estimators': 873, 'lgb_learning_rate': 0.010740118635109, 'lgb_num_leaves': 47, 'cb_iterations': 865, 'cb_learning_rate': 0.048741000314577766, 'cb_depth': 8}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002429 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:31:50,906] Trial 85 finished with value: 0.3807202884631346 and parameters: {'xgb_n_estimators': 898, 'xgb_learning_rate': 0.011461044362962268, 'xgb_max_depth': 3, 'lgb_n_estimators': 843, 'lgb_learning_rate': 0.005052189240642348, 'lgb_num_leaves': 48, 'cb_iterations': 936, 'cb_learning_rate': 0.030914407456700634, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001258 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:32:31,024] Trial 86 finished with value: 0.3825652498614824 and parameters: {'xgb_n_estimators': 978, 'xgb_learning_rate': 0.09322413753173121, 'xgb_max_depth': 4, 'lgb_n_estimators': 914, 'lgb_learning_rate': 0.011457671177681678, 'lgb_num_leaves': 46, 'cb_iterations': 851, 'cb_learning_rate': 0.062153866155614336, 'cb_depth': 7}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000817 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:32:58,708] Trial 87 finished with value: 0.37653127123190855 and parameters: {'xgb_n_estimators': 916, 'xgb_learning_rate': 0.06107961058961677, 'xgb_max_depth': 3, 'lgb_n_estimators': 728, 'lgb_learning_rate': 0.006227784074637752, 'lgb_num_leaves': 50, 'cb_iterations': 882, 'cb_learning_rate': 0.01803375571388659, 'cb_depth': 5}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001644 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:33:46,527] Trial 88 finished with value: 0.3783945534364694 and parameters: {'xgb_n_estimators': 230, 'xgb_learning_rate': 0.07420003343191452, 'xgb_max_depth': 3, 'lgb_n_estimators': 877, 'lgb_learning_rate': 0.007844428076130531, 'lgb_num_leaves': 32, 'cb_iterations': 906, 'cb_learning_rate': 0.0365011581192611, 'cb_depth': 8}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001018 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:34:16,844] Trial 89 finished with value: 0.378539713537048 and parameters: {'xgb_n_estimators': 946, 'xgb_learning_rate': 0.03116075262827834, 'xgb_max_depth': 4, 'lgb_n_estimators': 982, 'lgb_learning_rate': 0.003958368068438039, 'lgb_num_leaves': 25, 'cb_iterations': 974, 'cb_learning_rate': 0.02509962927207881, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001662 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:34:49,238] Trial 90 finished with value: 0.37510850390614764 and parameters: {'xgb_n_estimators': 881, 'xgb_learning_rate': 0.057196198053176155, 'xgb_max_depth': 3, 'lgb_n_estimators': 923, 'lgb_learning_rate': 0.023739673247691422, 'lgb_num_leaves': 46, 'cb_iterations': 779, 'cb_learning_rate': 0.09728024056585118, 'cb_depth': 6}. Best is trial 63 with value: 0.37491112734076737.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015091 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:35:25,320] Trial 91 finished with value: 0.3739273764215139 and parameters: {'xgb_n_estimators': 880, 'xgb_learning_rate': 0.057117949047589385, 'xgb_max_depth': 3, 'lgb_n_estimators': 911, 'lgb_learning_rate': 0.024799134630891156, 'lgb_num_leaves': 46, 'cb_iterations': 841, 'cb_learning_rate': 0.09910833424921396, 'cb_depth': 6}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001843 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:36:04,349] Trial 92 finished with value: 0.3758702356451311 and parameters: {'xgb_n_estimators': 886, 'xgb_learning_rate': 0.06663392702779994, 'xgb_max_depth': 3, 'lgb_n_estimators': 912, 'lgb_learning_rate': 0.02560940083241782, 'lgb_num_leaves': 45, 'cb_iterations': 782, 'cb_learning_rate': 0.09317454097750565, 'cb_depth': 6}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000913 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:36:33,211] Trial 93 finished with value: 0.375867015650176 and parameters: {'xgb_n_estimators': 871, 'xgb_learning_rate': 0.04940656497752107, 'xgb_max_depth': 3, 'lgb_n_estimators': 856, 'lgb_learning_rate': 0.025266179361193842, 'lgb_num_leaves': 43, 'cb_iterations': 647, 'cb_learning_rate': 0.09540508899784175, 'cb_depth': 6}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000969 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:36:58,654] Trial 94 finished with value: 0.3762114036282257 and parameters: {'xgb_n_estimators': 876, 'xgb_learning_rate': 0.04521156075362466, 'xgb_max_depth': 3, 'lgb_n_estimators': 853, 'lgb_learning_rate': 0.023032922069069865, 'lgb_num_leaves': 43, 'cb_iterations': 695, 'cb_learning_rate': 0.0830916863668877, 'cb_depth': 5}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001037 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:37:25,487] Trial 95 finished with value: 0.3774365068758976 and parameters: {'xgb_n_estimators': 475, 'xgb_learning_rate': 0.03460800523592928, 'xgb_max_depth': 4, 'lgb_n_estimators': 813, 'lgb_learning_rate': 0.017543059372385938, 'lgb_num_leaves': 46, 'cb_iterations': 638, 'cb_learning_rate': 0.09968521760573962, 'cb_depth': 6}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000693 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:39:35,962] Trial 96 finished with value: 0.3777809985917512 and parameters: {'xgb_n_estimators': 984, 'xgb_learning_rate': 0.041316694695923986, 'xgb_max_depth': 3, 'lgb_n_estimators': 873, 'lgb_learning_rate': 0.02969362152227773, 'lgb_num_leaves': 44, 'cb_iterations': 743, 'cb_learning_rate': 0.07070959872694094, 'cb_depth': 10}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000925 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:40:06,970] Trial 97 finished with value: 0.376718469183842 and parameters: {'xgb_n_estimators': 825, 'xgb_learning_rate': 0.04901328652768913, 'xgb_max_depth': 3, 'lgb_n_estimators': 792, 'lgb_learning_rate': 0.020249654000064565, 'lgb_num_leaves': 42, 'cb_iterations': 850, 'cb_learning_rate': 0.0778674187662246, 'cb_depth': 6}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000988 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:40:29,828] Trial 98 finished with value: 0.3751506901635157 and parameters: {'xgb_n_estimators': 938, 'xgb_learning_rate': 0.056945272885402694, 'xgb_max_depth': 3, 'lgb_n_estimators': 751, 'lgb_learning_rate': 0.02929168632841534, 'lgb_num_leaves': 45, 'cb_iterations': 548, 'cb_learning_rate': 0.08799054558036408, 'cb_depth': 5}. Best is trial 91 with value: 0.3739273764215139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001294 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


[I 2025-04-22 22:40:45,900] Trial 99 finished with value: 0.3748268804558323 and parameters: {'xgb_n_estimators': 959, 'xgb_learning_rate': 0.05826274152197808, 'xgb_max_depth': 3, 'lgb_n_estimators': 131, 'lgb_learning_rate': 0.035243298029715125, 'lgb_num_leaves': 45, 'cb_iterations': 541, 'cb_learning_rate': 0.08633704227797256, 'cb_depth': 5}. Best is trial 91 with value: 0.3739273764215139.


In [21]:
# Train all models
xgb_model, xgb_auc, xgb_loss = train_xgboost(X_train_final, y_train_final, X_val, y_val)
lgb_model, lgb_auc, lgb_loss = train_lightgbm(X_train_final, y_train_final, X_val, y_val)
cb_model, cb_auc, cb_loss = train_catboost(X_train_final, y_train_final, X_val, y_val)


Training XGBoost model...
XGBoost - Validation AUC: 0.9041, Log Loss: 0.3744

Training LightGBM model...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003169 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950
LightGBM - Validation AUC: 0.9005, Log Loss: 0.3802

Training CatBoost model...
CatBoost - Validation AUC: 0.8962, Log Loss: 0.3872


In [22]:
# Create ensemble weights based on validation performance
# We use inverse of log loss as weight (lower loss = higher weight)
weights = np.array([1/xgb_loss, 1/lgb_loss, 1/cb_loss])
weights = weights / weights.sum()  # Normalize to sum to 1
print(f"\nEnsemble weights: XGBoost={weights[0]:.3f}, LightGBM={weights[1]:.3f}, CatBoost={weights[2]:.3f}")


Ensemble weights: XGBoost=0.339, LightGBM=0.334, CatBoost=0.328


In [51]:
# best params

# Get best hyperparameters
best_params = study.best_params

# Extract hyperparameters for each model
best_xgb_params = {k: v for k, v in best_params.items() if k.startswith("xgb_")}
best_lgb_params = {k: v for k, v in best_params.items() if k.startswith("lgb_")}
best_cb_params = {k: v for k, v in best_params.items() if k.startswith("cb_")}

# Retrain models with best hyperparameters
xgb_model = xgb.XGBClassifier(**best_xgb_params)
xgb_model.fit(X_train_final, y_train_final, eval_set=[(X_val, y_val)], verbose=False)  # Train on full training data

lgb_model = lgb.LGBMClassifier(**best_lgb_params)
lgb_model.fit(X_train_final, y_train_final, eval_set=[(X_val, y_val)])  # Train on full training data

cb_model = cb.CatBoostClassifier(**best_cb_params, verbose=False)
cb_model.fit(X_train_final, y_train_final, eval_set=[(X_val, y_val)], verbose=False)  # Train on full training data

# Calculate individual model losses (for weights)
xgb_loss = log_loss(y_val, xgb_model.predict_proba(X_val))
lgb_loss = log_loss(y_val, lgb_model.predict_proba(X_val))
cb_loss = log_loss(y_val, cb_model.predict_proba(X_val))

# Calculate ensemble weights
weights = np.array([1/xgb_loss, 1/lgb_loss, 1/cb_loss])
weights = weights / weights.sum()  # Normalize

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001078 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2598
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 26
[LightGBM] [Info] Start training from score -0.395391
[LightGBM] [Info] Start training from score -3.722781
[LightGBM] [Info] Start training from score -1.195950


TypeError: CatBoostClassifier.__init__() got an unexpected keyword argument 'cb_iterations'

In [None]:
# Make predictions on test set
num_classes = len(label_encoder.classes_)
if num_classes == 2:
    test_pred_xgb = xgb_model.predict_proba(X_test_processed)[:, 1]
    test_pred_lgb = lgb_model.predict_proba(X_test_processed)[:, 1]
    test_pred_cb = cb_model.predict_proba(X_test_processed)[:, 1]

    test_pred_ensemble = (
        weights[0] * test_pred_xgb +
        weights[1] * test_pred_lgb +
        weights[2] * test_pred_cb
    )

    # Convert to class probabilities
    test_pred_probs = np.column_stack((1 - test_pred_ensemble, test_pred_ensemble))
else:
    test_pred_xgb = xgb_model.predict_proba(X_test_processed)
    test_pred_lgb = lgb_model.predict_proba(X_test_processed)
    test_pred_cb = cb_model.predict_proba(X_test_processed)

    test_pred_ensemble = (
        weights[0] * test_pred_xgb +
        weights[1] * test_pred_lgb +
        weights[2] * test_pred_cb
    )

    test_pred_probs = test_pred_ensemble

In [None]:
# Create the submission DataFrame matching the required format
results_df = pd.DataFrame({
    'id': test_indices  # Use the original indices from the test data
})

# Add probability columns for each class with the exact column names from the submission format
for i, class_name in enumerate(label_encoder.classes_):
    results_df[f'Status_{class_name}'] = test_pred_probs[:, i]

# Sort by original index to maintain original order
results_df = results_df.sort_values('id').reset_index(drop=True)

# Save predictions to CSV
results_df.to_csv('ensemble_predictions_v3.csv', index=False)
print("\nPredictions saved to 'ensemble_predictions_v3.csv'")

In [23]:
num_classes = len(label_encoder.classes_)
if num_classes == 2:
    val_pred_xgb = xgb_model.predict_proba(X_val)[:, 1]
    val_pred_lgb = lgb_model.predict_proba(X_val)[:, 1]
    val_pred_cb = cb_model.predict_proba(X_val)[:, 1]

    val_pred_ensemble = (
        weights[0] * val_pred_xgb +
        weights[1] * val_pred_lgb +
        weights[2] * val_pred_cb
    )

    # Evaluate ensemble
    ensemble_auc = roc_auc_score(y_val, val_pred_ensemble)
    # For log loss we need probabilities for both classes
    ensemble_probs = np.column_stack((1 - val_pred_ensemble, val_pred_ensemble))
    ensemble_loss = log_loss(y_val, ensemble_probs)
else:
    val_pred_xgb = xgb_model.predict_proba(X_val)
    val_pred_lgb = lgb_model.predict_proba(X_val)
    val_pred_cb = cb_model.predict_proba(X_val)

    val_pred_ensemble = (
        weights[0] * val_pred_xgb +
        weights[1] * val_pred_lgb +
        weights[2] * val_pred_cb
    )

    # Evaluate ensemble
    ensemble_auc = roc_auc_score(y_val, val_pred_ensemble, multi_class='ovr')
    ensemble_loss = log_loss(y_val, val_pred_ensemble)

print(f"\nEnsemble - Validation AUC: {ensemble_auc:.4f}, Log Loss: {ensemble_loss:.4f}")



Ensemble - Validation AUC: 0.9049, Log Loss: 0.3757


In [24]:
# Make predictions on test set
if num_classes == 2:
    test_pred_xgb = xgb_model.predict_proba(X_test_processed)[:, 1]
    test_pred_lgb = lgb_model.predict_proba(X_test_processed)[:, 1]
    test_pred_cb = cb_model.predict_proba(X_test_processed)[:, 1]

    test_pred_ensemble = (
        weights[0] * test_pred_xgb +
        weights[1] * test_pred_lgb +
        weights[2] * test_pred_cb
    )

    # Convert to class probabilities
    test_pred_probs = np.column_stack((1 - test_pred_ensemble, test_pred_ensemble))
else:
    test_pred_xgb = xgb_model.predict_proba(X_test_processed)
    test_pred_lgb = lgb_model.predict_proba(X_test_processed)
    test_pred_cb = cb_model.predict_proba(X_test_processed)

    test_pred_ensemble = (
        weights[0] * test_pred_xgb +
        weights[1] * test_pred_lgb +
        weights[2] * test_pred_cb
    )

    test_pred_probs = test_pred_ensemble

# Create the submission DataFrame matching the required format
results_df = pd.DataFrame({
    'id': test_indices
})

# Add probability columns for each class with the exact column names from the submission format
for i, class_name in enumerate(label_encoder.classes_):
    results_df[f'Status_{class_name}'] = test_pred_probs[:, i]

# Sort by original index to maintain original order
results_df = results_df.sort_values('id').reset_index(drop=True)


In [27]:
results_df = pd.DataFrame({
    'id': range(15000, 25000) # IDs from 15000 to 24999
})

# Add probability columns for each class with the exact column names from the submission format
for i, class_name in enumerate(label_encoder.classes_):
    results_df[f'Status_{class_name}'] = test_pred_probs[:, i]

In [28]:
print("\nFirst few predictions:")
print(results_df.head())


First few predictions:
      id  Status_C  Status_CL  Status_D
0  15000  0.834856   0.013206  0.151938
1  15001  0.672774   0.011871  0.315355
2  15002  0.945995   0.027247  0.026758
3  15003  0.352355   0.579074  0.068571
4  15004  0.204372   0.007669  0.787960


In [29]:
# Save predictions to CSV
results_df.to_csv('ensemble_predictions.csv', index=False)
print("\nPredictions saved to 'ensemble_predictions.csv' with columns:", results_df.columns.tolist())


Predictions saved to 'ensemble_predictions.csv' with columns: ['id', 'Status_C', 'Status_CL', 'Status_D']


In [None]:
# Create feature importance visualization for the base models
def print_feature_importance(model, model_name, feature_names):
    if hasattr(model, 'feature_importances_'):
        importances = model.feature_importances_
        indices = np.argsort(importances)[::-1]

        print(f"\nTop 10 features for {model_name}:")
        for i in range(min(10, len(feature_names))):
            idx = indices[i]
            if idx < len(feature_names):
                print(f"{feature_names[idx]}: {importances[idx]:.4f}")

# Print feature importances
print_feature_importance(xgb_model, "XGBoost", feature_names)
print_feature_importance(lgb_model, "LightGBM", feature_names)
print_feature_importance(cb_model, "CatBoost", feature_names)

In [None]:
# Cross-validation for more robust evaluation
def cross_validate_ensemble(X, y, n_splits=5):
    print("\nPerforming cross-validation...")
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    cv_aucs = []
    cv_losses = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        print(f"Fold {fold+1}/{n_splits}")
        X_train_fold, X_val_fold = X[train_idx], X[val_idx]
        y_train_fold, y_val_fold = y[train_idx], y[val_idx]

        # Train models
        xgb_model, xgb_auc, xgb_loss = train_xgboost(X_train_fold, y_train_fold, X_val_fold, y_val_fold)
        lgb_model, lgb_auc, lgb_loss = train_lightgbm(X_train_fold, y_train_fold, X_val_fold, y_val_fold)
        cb_model, cb_auc, cb_loss = train_catboost(X_train_fold, y_train_fold, X_val_fold, y_val_fold)

        # Weight models
        weights = np.array([1/xgb_loss, 1/lgb_loss, 1/cb_loss])
        weights = weights / weights.sum()

        # Make ensemble prediction
        num_classes = len(np.unique(y))
        if num_classes == 2:
            val_pred_xgb = xgb_model.predict_proba(X_val_fold)[:, 1]
            val_pred_lgb = lgb_model.predict_proba(X_val_fold)[:, 1]
            val_pred_cb = cb_model.predict_proba(X_val_fold)[:, 1]

            val_pred_ensemble = (
                weights[0] * val_pred_xgb +
                weights[1] * val_pred_lgb +
                weights[2] * val_pred_cb
            )

            # Evaluate ensemble
            fold_auc = roc_auc_score(y_val_fold, val_pred_ensemble)
            # For log loss we need probabilities for both classes
            ensemble_probs = np.column_stack((1 - val_pred_ensemble, val_pred_ensemble))
            fold_loss = log_loss(y_val_fold, ensemble_probs)
        else:
            val_pred_xgb = xgb_model.predict_proba(X_val_fold)
            val_pred_lgb = lgb_model.predict_proba(X_val_fold)
            val_pred_cb = cb_model.predict_proba(X_val_fold)

            val_pred_ensemble = (
                weights[0] * val_pred_xgb +
                weights[1] * val_pred_lgb +
                weights[2] * val_pred_cb
            )

            # Evaluate ensemble
            fold_auc = roc_auc_score(y_val_fold, val_pred_ensemble, multi_class='ovr')
            fold_loss = log_loss(y_val_fold, val_pred_ensemble)

        cv_aucs.append(fold_auc)
        cv_losses.append(fold_loss)
        print(f"Fold {fold+1} - AUC: {fold_auc:.4f}, Log Loss: {fold_loss:.4f}")

    print(f"\nCross-validation results:")
    print(f"Mean AUC: {np.mean(cv_aucs):.4f} ± {np.std(cv_aucs):.4f}")
    print(f"Mean Log Loss: {np.mean(cv_losses):.4f} ± {np.std(cv_losses):.4f}")


In [None]:
# Perform cross-validation
cross_validate_ensemble(X_train_processed, y_encoded)

print("\nModel training and evaluation complete!")