NCR Ride Bookings Preprocessing

In [1]:
pip install mlflow optuna joblib lightgbm shap



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error
import mlflow
import mlflow.lightgbm
import mlflow.xgboost
import optuna
import joblib
import logging
import shap

In [3]:
# ---------------------------- CONFIG ----------------------------------
CONFIG = {
    "ride_data_path": "ncr_ride_bookings.csv",
    "weather_data_path": "kaggel_weather_2013_to_2024.csv",
    "surge_threshold_quantile": 0.75,
    "random_state": 42,
    "test_size": 0.2
}

logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
logger = logging.getLogger(__name__)

In [4]:
# ---------------------------- DATA PREPROCESSING -----------------------
def preprocess_ride_data(path):
    logger.info("Loading and preprocessing ride data...")
    df = pd.read_csv(path)
    df.drop(["Booking ID", "Customer ID"], axis=1, inplace=True)

    df['Original_Date'] = pd.to_datetime(df['Date'])
    df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
    df['Hour'] = df['Datetime'].dt.hour
    df['Day'] = df['Datetime'].dt.day
    df['Month'] = df['Datetime'].dt.month
    df['Weekday'] = df['Datetime'].dt.dayofweek
    df['Is_Weekend'] = df['Weekday'].isin([5, 6]).astype(int)
    df.drop(['Date', 'Time', 'Datetime'], axis=1, inplace=True)

    for col in ['Avg VTAT', 'Avg CTAT', 'Booking Value', 'Ride Distance']:
        df[col] = df.groupby('Vehicle Type')[col].transform(lambda x: x.fillna(x.median()))
    for col in ['Driver Ratings', 'Customer Rating']:
        df[col] = df.groupby('Vehicle Type')[col].transform(lambda x: x.fillna(x.mean()))

    df['Cancelled Rides by Customer'] = df['Cancelled Rides by Customer'].fillna(0)
    df['Cancelled Rides by Driver'] = df['Cancelled Rides by Driver'].fillna(0)
    df['Incomplete Rides'] = df['Incomplete Rides'].fillna(0)
    df['Reason for cancelling by Customer'] = df['Reason for cancelling by Customer'].fillna("No Cancellation")
    df['Driver Cancellation Reason'] = df['Driver Cancellation Reason'].fillna("No Cancellation")
    df['Incomplete Rides Reason'] = df['Incomplete Rides Reason'].fillna("Completed")
    df['Payment Method'] = df['Payment Method'].fillna("Unknown")

    fare_threshold = df['Booking Value'].quantile(CONFIG["surge_threshold_quantile"])
    df['Is_Surge'] = (df['Booking Value'] > fare_threshold).astype(int)

    le = LabelEncoder()
    string_cat_cols = ['Vehicle Type', 'Pickup Location', 'Drop Location', 'Payment Method']
    for col in string_cat_cols:
        df[col + '_Encoded'] = le.fit_transform(df[col])
        df.drop(col, axis=1, inplace=True)

    other_cat_cols = ['Reason for cancelling by Customer', 'Driver Cancellation Reason',
                      'Incomplete Rides Reason', 'Booking Status', 'Is_Surge']
    for col in other_cat_cols:
        df[col + '_Encoded'] = le.fit_transform(df[col].astype(str))
        df.drop(col, axis=1, inplace=True)

    logger.info(f"Ride data preprocessing complete. Shape: {df.shape}")
    return df

In [5]:
def preprocess_weather_data(path):
    logger.info("Loading and cleaning weather data...")
    weather_df = pd.read_csv(path)
    if 'Unnamed: 0' in weather_df.columns:
        weather_df.drop(columns=['Unnamed: 0'], inplace=True)
    weather_df['DATE'] = pd.to_datetime(weather_df['DATE'])
    weather_df.fillna(method='ffill', inplace=True)
    weather_df.columns = weather_df.columns.str.strip().str.lower().str.replace(' ', '_')
    logger.info(f"Weather data cleaned. Shape: {weather_df.shape}")
    return weather_df

In [6]:
def merge_and_scale(df, weather_df):
    logger.info("Merging ride and weather data...")
    weather_2024 = weather_df[weather_df['year'] == 2024].copy()
    merged_df = pd.merge(df, weather_2024, left_on='Original_Date', right_on='date', how='left')
    merged_df.drop(columns=['date'], inplace=True)

    weather_columns = [c for c in weather_df.columns if c not in ['date']]
    merged_df.sort_values(by='Original_Date', inplace=True)
    merged_df[weather_columns] = merged_df[weather_columns].ffill()

    num_cols = merged_df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    exclude_from_scaling = ['year', 'month_y', 'day_y']
    num_cols = [col for col in num_cols if col not in exclude_from_scaling]

    scaler = StandardScaler()
    merged_df[num_cols] = scaler.fit_transform(merged_df[num_cols])

    joblib.dump(scaler, "scaler.pkl")
    logger.info("Scaling complete and scaler saved as scaler.pkl")

    merged_df.to_csv('preprocessed_ride_data.csv', index=False)
    logger.info(f"Merged preprocessed data saved. Shape: {merged_df.shape}")
    return merged_df


In [7]:
# ---------------------------- MODEL TRAINING ---------------------------
def train_lightgbm(df):
    logger.info("Training LightGBM model...")

    X = df.drop(['Booking Value', 'Original_Date'], axis=1)
    y = df['Booking Value']

    # Handle any object columns that might have come from weather (like 'conditions')
    object_cols = X.select_dtypes(include=['object']).columns
    if len(object_cols) > 0:
        logger.warning(f"Encoding object columns: {list(object_cols)}")
        for col in object_cols:
            X[col] = X[col].astype('category').cat.codes

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=CONFIG["test_size"], random_state=CONFIG["random_state"]
    )

    def objective(trial):
        params = {
            'objective': 'regression_l1',
            'metric': 'rmse',
            'n_estimators': trial.suggest_int('n_estimators', 500, 2000),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'num_leaves': trial.suggest_int('num_leaves', 20, 300),
            'max_depth': trial.suggest_int('max_depth', 3, 12),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
            'random_state': CONFIG["random_state"],
            'n_jobs': -1,
        }
        model = lgb.LGBMRegressor(**params)
        model.fit(X_train, y_train, eval_set=[(X_test, y_test)],
                  callbacks=[lgb.early_stopping(100, verbose=False)])
        preds = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        return rmse

    # --------------------- Hyperparameter Tuning ---------------------
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=50)
    best_params = study.best_params

    # --------------------- Model Training ---------------------
    mlflow.set_experiment("Ride Surge Prediction")
    with mlflow.start_run(run_name="Tuned LightGBM"):
        mlflow.log_params(best_params)

        model = lgb.LGBMRegressor(**best_params, random_state=CONFIG["random_state"])
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        mlflow.log_metric("rmse", rmse)
        mlflow.lightgbm.log_model(model, "lgbm-ride-surge-model")

        # --------------------- Explainable AI (SHAP) ---------------------
        logger.info("Generating SHAP explainability plots...")
        import shap
        import matplotlib.pyplot as plt

        # Create SHAP explainer
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X_test)

        # Global Feature Importance Plot
        shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
        plt.title("Feature Importance (SHAP)")
        plt.tight_layout()
        plt.savefig("shap_summary_bar.png", dpi=200)
        plt.close()

        # Detailed Beeswarm Plot
        shap.summary_plot(shap_values, X_test, show=False)
        plt.title("SHAP Feature Impact (Beeswarm)")
        plt.tight_layout()
        plt.savefig("shap_summary_beeswarm.png", dpi=200)
        plt.close()

        # Log SHAP plots in MLflow
        mlflow.log_artifact("shap_summary_bar.png")
        mlflow.log_artifact("shap_summary_beeswarm.png")

        logger.info("SHAP explainability plots generated and logged to MLflow.")

    # --------------------- Save Model ---------------------
    joblib.dump(model, "lgbm_model.pkl")
    logger.info(f"LightGBM training complete. RMSE: {rmse:.3f}. Model saved as lgbm_model.pkl")

    return model, rmse

In [8]:
def train_xgboost(df):
    logger.info("Training XGBoost model...")

    X = df.drop(['Booking Value', 'Original_Date'], axis=1)
    y = df['Booking Value']

    # Handle any object columns that might have come from weather (like 'conditions')
    object_cols = X.select_dtypes(include=['object']).columns
    if len(object_cols) > 0:
        logger.warning(f"Encoding object columns: {list(object_cols)}")
        for col in object_cols:
            X[col] = X[col].astype('category').cat.codes

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=CONFIG["test_size"], random_state=CONFIG["random_state"]
    )

    # --------------------- Hyperparameter Tuning ---------------------
    def objective_xgb(trial):
        params = {
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'n_estimators': trial.suggest_int('n_estimators', 500, 2500),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'max_depth': trial.suggest_int('max_depth', 3, 12),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'gamma': trial.suggest_float('gamma', 0, 0.5),
            'random_state': CONFIG["random_state"],
            'n_jobs': -1,
            'tree_method': 'hist',
            'enable_categorical': True
        }
        model = xgb.XGBRegressor(**params)
        model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
        preds = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        return rmse

    study_xgb = optuna.create_study(direction='minimize')
    study_xgb.optimize(objective_xgb, n_trials=50)
    best_params = study_xgb.best_params

    # --------------------- Model Training ---------------------
    mlflow.set_experiment("Ride Surge Prediction")
    with mlflow.start_run(run_name="Tuned XGBoost"):
        mlflow.log_params(best_params)

        model = xgb.XGBRegressor(**best_params, random_state=CONFIG["random_state"],
                                 tree_method='hist', enable_categorical=True)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        mlflow.log_metric("rmse", rmse)
        mlflow.xgboost.log_model(model, "xgb-ride-surge-model")

        # --------------------- Explainable AI (SHAP) ---------------------
        logger.info("Generating SHAP explainability plots for XGBoost...")
        import shap
        import matplotlib.pyplot as plt

        # Create SHAP explainer (TreeExplainer is best for XGBoost)
        explainer = shap.Explainer(model)
        shap_values = explainer(X_test)

        # Global Feature Importance (bar plot)
        shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
        plt.title("XGBoost Feature Importance (SHAP)")
        plt.tight_layout()
        plt.savefig("xgb_shap_summary_bar.png", dpi=200)
        plt.close()

        # Beeswarm (detailed impact)
        shap.summary_plot(shap_values, X_test, show=False)
        plt.title("XGBoost SHAP Feature Impact (Beeswarm)")
        plt.tight_layout()
        plt.savefig("xgb_shap_summary_beeswarm.png", dpi=200)
        plt.close()

        # Log plots in MLflow
        mlflow.log_artifact("xgb_shap_summary_bar.png")
        mlflow.log_artifact("xgb_shap_summary_beeswarm.png")

        logger.info("SHAP explainability plots for XGBoost saved and logged in MLflow.")

    # --------------------- Save Model ---------------------
    joblib.dump(model, "xgb_model.pkl")
    logger.info(f"XGBoost training complete. RMSE: {rmse:.3f}. Model saved as xgb_model.pkl")

    return model, rmse

In [9]:
# ---------------------------- MAIN ------------------------------------
if __name__ == "__main__":
    ride_df = preprocess_ride_data(CONFIG["ride_data_path"])
    weather_df = preprocess_weather_data(CONFIG["weather_data_path"])
    merged_df = merge_and_scale(ride_df, weather_df)

    lgb_model, lgb_rmse = train_lightgbm(merged_df)
    xgb_model, xgb_rmse = train_xgboost(merged_df)

    logger.info(f"✅ All training complete. LGBM RMSE: {lgb_rmse:.3f}, XGB RMSE: {xgb_rmse:.3f}")


  weather_df.fillna(method='ffill', inplace=True)
[I 2025-10-15 13:52:44,722] A new study created in memory with name: no-name-55b34eea-99aa-4e38-b121-bd50cd88b58c


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:52:50,751] Trial 0 finished with value: 0.6789914229889819 and parameters: {'n_estimators': 1054, 'learning_rate': 0.22921357096402842, 'num_leaves': 245, 'max_depth': 4, 'min_child_samples': 5, 'subsample': 0.9622098639357802, 'colsample_bytree': 0.8619240745014032}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058547 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:53:00,073] Trial 1 finished with value: 0.6822567608514288 and parameters: {'n_estimators': 1635, 'learning_rate': 0.15306142057089406, 'num_leaves': 288, 'max_depth': 8, 'min_child_samples': 21, 'subsample': 0.6174946026886398, 'colsample_bytree': 0.6308398378626963}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:53:05,810] Trial 2 finished with value: 0.6829493442977449 and parameters: {'n_estimators': 1148, 'learning_rate': 0.20225256633109207, 'num_leaves': 199, 'max_depth': 10, 'min_child_samples': 59, 'subsample': 0.89892741514607, 'colsample_bytree': 0.851309759272661}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.067386 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:53:15,679] Trial 3 finished with value: 0.6810701491720852 and parameters: {'n_estimators': 1465, 'learning_rate': 0.27026555647616657, 'num_leaves': 295, 'max_depth': 6, 'min_child_samples': 11, 'subsample': 0.8898651311335135, 'colsample_bytree': 0.8302109911634186}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070205 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:53:39,685] Trial 4 finished with value: 0.6825030326739552 and parameters: {'n_estimators': 664, 'learning_rate': 0.03358697785720331, 'num_leaves': 255, 'max_depth': 11, 'min_child_samples': 70, 'subsample': 0.6462998838258279, 'colsample_bytree': 0.7390892810498583}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:54:02,829] Trial 5 finished with value: 0.6810355738664351 and parameters: {'n_estimators': 966, 'learning_rate': 0.025840286151553322, 'num_leaves': 20, 'max_depth': 3, 'min_child_samples': 46, 'subsample': 0.939209362203651, 'colsample_bytree': 0.8596242993183845}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058077 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:54:10,309] Trial 6 finished with value: 0.681728172743899 and parameters: {'n_estimators': 1068, 'learning_rate': 0.11037303071929137, 'num_leaves': 84, 'max_depth': 11, 'min_child_samples': 44, 'subsample': 0.6789127289347221, 'colsample_bytree': 0.8209062467539743}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:54:22,310] Trial 7 finished with value: 0.6812270550535339 and parameters: {'n_estimators': 1652, 'learning_rate': 0.1010553359952022, 'num_leaves': 31, 'max_depth': 3, 'min_child_samples': 68, 'subsample': 0.8760859660601273, 'colsample_bytree': 0.9366871059666615}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.036713 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:54:31,122] Trial 8 finished with value: 0.6833376422212157 and parameters: {'n_estimators': 1728, 'learning_rate': 0.17016139854434348, 'num_leaves': 157, 'max_depth': 12, 'min_child_samples': 85, 'subsample': 0.7309562775419846, 'colsample_bytree': 0.9575673144382612}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:54:49,751] Trial 9 finished with value: 0.6818825793122617 and parameters: {'n_estimators': 1192, 'learning_rate': 0.03688325106382562, 'num_leaves': 219, 'max_depth': 10, 'min_child_samples': 79, 'subsample': 0.9025052340595133, 'colsample_bytree': 0.6773977692566235}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057540 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:54:56,963] Trial 10 finished with value: 0.679465621227508 and parameters: {'n_estimators': 564, 'learning_rate': 0.2971953672275913, 'num_leaves': 143, 'max_depth': 5, 'min_child_samples': 100, 'subsample': 0.9939280324641151, 'colsample_bytree': 0.7304123862033373}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061493 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:55:09,276] Trial 11 finished with value: 0.6792086979064754 and parameters: {'n_estimators': 577, 'learning_rate': 0.28614749223266306, 'num_leaves': 153, 'max_depth': 5, 'min_child_samples': 99, 'subsample': 0.9893235404808816, 'colsample_bytree': 0.7389295505583445}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057651 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:55:17,121] Trial 12 finished with value: 0.6791365870213055 and parameters: {'n_estimators': 830, 'learning_rate': 0.24084245360453962, 'num_leaves': 117, 'max_depth': 5, 'min_child_samples': 27, 'subsample': 0.8205095173046055, 'colsample_bytree': 0.7498510724720003}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058923 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:55:24,192] Trial 13 finished with value: 0.6807096968467686 and parameters: {'n_estimators': 819, 'learning_rate': 0.23260536672577972, 'num_leaves': 96, 'max_depth': 7, 'min_child_samples': 26, 'subsample': 0.790534598548681, 'colsample_bytree': 0.9027347401245102}. Best is trial 0 with value: 0.6789914229889819.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060397 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:55:29,412] Trial 14 finished with value: 0.678231393199787 and parameters: {'n_estimators': 1386, 'learning_rate': 0.2348117164684247, 'num_leaves': 217, 'max_depth': 4, 'min_child_samples': 5, 'subsample': 0.8081669211909257, 'colsample_bytree': 0.9956816119714615}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059691 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:55:37,877] Trial 15 finished with value: 0.678869320078522 and parameters: {'n_estimators': 1421, 'learning_rate': 0.21593620966315594, 'num_leaves': 210, 'max_depth': 4, 'min_child_samples': 8, 'subsample': 0.7983745032697196, 'colsample_bytree': 0.9845387814124766}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061700 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:55:43,198] Trial 16 finished with value: 0.6814447982834151 and parameters: {'n_estimators': 1432, 'learning_rate': 0.1857349726753391, 'num_leaves': 197, 'max_depth': 8, 'min_child_samples': 36, 'subsample': 0.7897856968549342, 'colsample_bytree': 0.999519664418535}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:55:55,469] Trial 17 finished with value: 0.6785112627539949 and parameters: {'n_estimators': 1948, 'learning_rate': 0.13054885354974866, 'num_leaves': 196, 'max_depth': 4, 'min_child_samples': 11, 'subsample': 0.7306401543930342, 'colsample_bytree': 0.9995191190266283}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.082318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:56:04,156] Trial 18 finished with value: 0.6807540990594735 and parameters: {'n_estimators': 1974, 'learning_rate': 0.12299161904484333, 'num_leaves': 181, 'max_depth': 7, 'min_child_samples': 20, 'subsample': 0.7212118125542886, 'colsample_bytree': 0.9227257901223626}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060207 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:56:11,599] Trial 19 finished with value: 0.6803369711014796 and parameters: {'n_estimators': 1799, 'learning_rate': 0.07847964699032572, 'num_leaves': 244, 'max_depth': 6, 'min_child_samples': 34, 'subsample': 0.7388851137101318, 'colsample_bytree': 0.8942519434555957}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042412 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:56:18,964] Trial 20 finished with value: 0.6818253827302014 and parameters: {'n_estimators': 1941, 'learning_rate': 0.15637663813418895, 'num_leaves': 175, 'max_depth': 3, 'min_child_samples': 15, 'subsample': 0.8449516249941433, 'colsample_bytree': 0.9585166806691534}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061629 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:56:27,051] Trial 21 finished with value: 0.6784870050483597 and parameters: {'n_estimators': 1368, 'learning_rate': 0.20596046755034045, 'num_leaves': 216, 'max_depth': 4, 'min_child_samples': 5, 'subsample': 0.7636218035625637, 'colsample_bytree': 0.9944733562949873}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.028136 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:56:33,213] Trial 22 finished with value: 0.6786437613206985 and parameters: {'n_estimators': 1528, 'learning_rate': 0.26069139614579173, 'num_leaves': 229, 'max_depth': 4, 'min_child_samples': 15, 'subsample': 0.7531035726579394, 'colsample_bytree': 0.9523796037058913}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059646 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:56:42,499] Trial 23 finished with value: 0.6801083008914313 and parameters: {'n_estimators': 1311, 'learning_rate': 0.13412018678920312, 'num_leaves': 264, 'max_depth': 6, 'min_child_samples': 5, 'subsample': 0.6981711057273501, 'colsample_bytree': 0.9941099839872426}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062459 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:56:47,835] Trial 24 finished with value: 0.6786413356533224 and parameters: {'n_estimators': 1315, 'learning_rate': 0.18640657429915775, 'num_leaves': 271, 'max_depth': 4, 'min_child_samples': 34, 'subsample': 0.7564130470471753, 'colsample_bytree': 0.9128220309988622}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059752 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:00,377] Trial 25 finished with value: 0.6809748697014687 and parameters: {'n_estimators': 1831, 'learning_rate': 0.08008485906033944, 'num_leaves': 227, 'max_depth': 5, 'min_child_samples': 18, 'subsample': 0.8467400460879674, 'colsample_bytree': 0.9684675548558038}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059764 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:07,023] Trial 26 finished with value: 0.6805974175807888 and parameters: {'n_estimators': 1576, 'learning_rate': 0.2029436232621164, 'num_leaves': 178, 'max_depth': 3, 'min_child_samples': 25, 'subsample': 0.670004480875444, 'colsample_bytree': 0.7884507664321087}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061392 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:13,285] Trial 27 finished with value: 0.6783569458291837 and parameters: {'n_estimators': 1281, 'learning_rate': 0.2544042049421726, 'num_leaves': 196, 'max_depth': 4, 'min_child_samples': 12, 'subsample': 0.7745741341789201, 'colsample_bytree': 0.9983971315424095}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060808 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:19,439] Trial 28 finished with value: 0.6807175741239102 and parameters: {'n_estimators': 1318, 'learning_rate': 0.2562970815694568, 'num_leaves': 131, 'max_depth': 6, 'min_child_samples': 5, 'subsample': 0.8230274347589581, 'colsample_bytree': 0.8811618657866149}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:25,386] Trial 29 finished with value: 0.6786876937715436 and parameters: {'n_estimators': 941, 'learning_rate': 0.24735390842983612, 'num_leaves': 241, 'max_depth': 4, 'min_child_samples': 14, 'subsample': 0.770620883000757, 'colsample_bytree': 0.9420078150253125}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060456 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:30,277] Trial 30 finished with value: 0.680441079490075 and parameters: {'n_estimators': 1204, 'learning_rate': 0.22098953891681278, 'num_leaves': 172, 'max_depth': 5, 'min_child_samples': 40, 'subsample': 0.8574497780103651, 'colsample_bytree': 0.972003254749006}. Best is trial 14 with value: 0.678231393199787.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.120241 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:36,512] Trial 31 finished with value: 0.6780988815978101 and parameters: {'n_estimators': 1370, 'learning_rate': 0.27314465805217136, 'num_leaves': 194, 'max_depth': 4, 'min_child_samples': 9, 'subsample': 0.7077017444804962, 'colsample_bytree': 0.9990625966337539}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:47,942] Trial 32 finished with value: 0.6798166071352794 and parameters: {'n_estimators': 1395, 'learning_rate': 0.2828336910054046, 'num_leaves': 204, 'max_depth': 3, 'min_child_samples': 5, 'subsample': 0.6981399975730438, 'colsample_bytree': 0.9294211178768998}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059969 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:57:54,030] Trial 33 finished with value: 0.6782690154657394 and parameters: {'n_estimators': 1109, 'learning_rate': 0.2729046370966431, 'num_leaves': 223, 'max_depth': 4, 'min_child_samples': 23, 'subsample': 0.6157660602168866, 'colsample_bytree': 0.9800033025995806}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059469 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:01,748] Trial 34 finished with value: 0.6830565356162184 and parameters: {'n_estimators': 1088, 'learning_rate': 0.27087543833711264, 'num_leaves': 189, 'max_depth': 9, 'min_child_samples': 23, 'subsample': 0.6029027799599646, 'colsample_bytree': 0.9710952263238993}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:06,619] Trial 35 finished with value: 0.6799379458798505 and parameters: {'n_estimators': 1242, 'learning_rate': 0.2999469005798253, 'num_leaves': 282, 'max_depth': 5, 'min_child_samples': 28, 'subsample': 0.64254557538043, 'colsample_bytree': 0.8787106025127555}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:14,570] Trial 36 finished with value: 0.6782759953047188 and parameters: {'n_estimators': 1134, 'learning_rate': 0.2712159814415621, 'num_leaves': 224, 'max_depth': 4, 'min_child_samples': 52, 'subsample': 0.6354038187074638, 'colsample_bytree': 0.9750462200272288}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:19,695] Trial 37 finished with value: 0.6806938520644693 and parameters: {'n_estimators': 977, 'learning_rate': 0.2758670139555859, 'num_leaves': 240, 'max_depth': 7, 'min_child_samples': 54, 'subsample': 0.6280941619282339, 'colsample_bytree': 0.6117848493460499}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069349 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:27,949] Trial 38 finished with value: 0.6816807394742644 and parameters: {'n_estimators': 1139, 'learning_rate': 0.23061955663816502, 'num_leaves': 253, 'max_depth': 3, 'min_child_samples': 59, 'subsample': 0.662934813089251, 'colsample_bytree': 0.9320248368410395}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058882 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:32,537] Trial 39 finished with value: 0.6803892895362313 and parameters: {'n_estimators': 1498, 'learning_rate': 0.264795953321366, 'num_leaves': 297, 'max_depth': 6, 'min_child_samples': 47, 'subsample': 0.618440059155379, 'colsample_bytree': 0.8309729486468281}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.071721 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:41,517] Trial 40 finished with value: 0.681701827597645 and parameters: {'n_estimators': 857, 'learning_rate': 0.28614076545337663, 'num_leaves': 229, 'max_depth': 3, 'min_child_samples': 62, 'subsample': 0.6373568950102768, 'colsample_bytree': 0.9721827598588599}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059617 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:46,566] Trial 41 finished with value: 0.6788040811485517 and parameters: {'n_estimators': 1030, 'learning_rate': 0.24920030026152518, 'num_leaves': 206, 'max_depth': 4, 'min_child_samples': 13, 'subsample': 0.6001691319616986, 'colsample_bytree': 0.9519816107380387}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059874 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:53,943] Trial 42 finished with value: 0.6785912327873928 and parameters: {'n_estimators': 1138, 'learning_rate': 0.25933620194120943, 'num_leaves': 169, 'max_depth': 4, 'min_child_samples': 19, 'subsample': 0.7049590142536463, 'colsample_bytree': 0.9780827175332274}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060642 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:58:58,959] Trial 43 finished with value: 0.6797567970917809 and parameters: {'n_estimators': 1264, 'learning_rate': 0.23790686148918738, 'num_leaves': 226, 'max_depth': 5, 'min_child_samples': 10, 'subsample': 0.66119752841949, 'colsample_bytree': 0.944422001317744}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060884 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:59:02,546] Trial 44 finished with value: 0.6801690377170418 and parameters: {'n_estimators': 1606, 'learning_rate': 0.2756067669442541, 'num_leaves': 190, 'max_depth': 3, 'min_child_samples': 30, 'subsample': 0.6893223079081425, 'colsample_bytree': 0.6903128822368929}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.176941 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:59:08,636] Trial 45 finished with value: 0.6798007859766068 and parameters: {'n_estimators': 1187, 'learning_rate': 0.2908417845688439, 'num_leaves': 260, 'max_depth': 5, 'min_child_samples': 72, 'subsample': 0.8260585733956697, 'colsample_bytree': 0.9827128546335292}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.062468 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:59:14,792] Trial 46 finished with value: 0.6806024824242781 and parameters: {'n_estimators': 1081, 'learning_rate': 0.21587506432034703, 'num_leaves': 52, 'max_depth': 4, 'min_child_samples': 88, 'subsample': 0.7157463207005664, 'colsample_bytree': 0.9139588135903846}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061894 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:59:22,565] Trial 47 finished with value: 0.6798770596465814 and parameters: {'n_estimators': 1262, 'learning_rate': 0.2548957147182131, 'num_leaves': 158, 'max_depth': 5, 'min_child_samples': 17, 'subsample': 0.7783025431463705, 'colsample_bytree': 0.9630081906208156}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059257 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:59:27,769] Trial 48 finished with value: 0.6818539749804945 and parameters: {'n_estimators': 750, 'learning_rate': 0.2401300512529066, 'num_leaves': 220, 'max_depth': 8, 'min_child_samples': 10, 'subsample': 0.9180405572211483, 'colsample_bytree': 0.8470291081800144}. Best is trial 31 with value: 0.6780988815978101.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060002 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.194694


[I 2025-10-15 13:59:35,689] Trial 49 finished with value: 0.678369842163281 and parameters: {'n_estimators': 1662, 'learning_rate': 0.27293536649585676, 'num_leaves': 141, 'max_depth': 4, 'min_child_samples': 22, 'subsample': 0.651733808229694, 'colsample_bytree': 0.986094694015988}. Best is trial 31 with value: 0.6780988815978101.
2025/10/15 13:59:35 INFO mlflow.tracking.fluent: Experiment with name 'Ride Surge Prediction' does not exist. Creating a new experiment.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069746 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5039
[LightGBM] [Info] Number of data points in the train set: 120000, number of used features: 47
[LightGBM] [Info] Start training from score -0.001115


[I 2025-10-15 14:02:00,410] A new study created in memory with name: no-name-be353b65-461d-4dd7-a6df-f1ae81bd9f94
[I 2025-10-15 14:02:34,677] Trial 0 finished with value: 0.7181726895498779 and parameters: {'n_estimators': 1636, 'learning_rate': 0.26267811919320977, 'max_depth': 7, 'subsample': 0.8313621531731299, 'colsample_bytree': 0.9190105445949637, 'min_child_weight': 1, 'gamma': 0.4455215672692948}. Best is trial 0 with value: 0.7181726895498779.
[I 2025-10-15 14:03:33,379] Trial 1 finished with value: 0.6662768253830902 and parameters: {'n_estimators': 2405, 'learning_rate': 0.04307019971696408, 'max_depth': 4, 'subsample': 0.8161834228348339, 'colsample_bytree': 0.6509755143876106, 'min_child_weight': 10, 'gamma': 0.3397211647025657}. Best is trial 1 with value: 0.6662768253830902.
[I 2025-10-15 14:04:48,750] Trial 2 finished with value: 0.7104135919016984 and parameters: {'n_estimators': 2287, 'learning_rate': 0.10353958388706028, 'max_depth': 9, 'subsample': 0.706292393404225

In [10]:
!zip -r ride_surge_project.zip /content
from google.colab import files
files.download("ride_surge_project.zip")

  adding: content/ (stored 0%)
  adding: content/.config/ (stored 0%)
  adding: content/.config/active_config (stored 0%)
  adding: content/.config/.last_update_check.json (deflated 22%)
  adding: content/.config/logs/ (stored 0%)
  adding: content/.config/logs/2025.10.13/ (stored 0%)
  adding: content/.config/logs/2025.10.13/13.43.26.183753.log (deflated 57%)
  adding: content/.config/logs/2025.10.13/13.43.04.370331.log (deflated 58%)
  adding: content/.config/logs/2025.10.13/13.43.17.820264.log (deflated 58%)
  adding: content/.config/logs/2025.10.13/13.42.34.331861.log (deflated 92%)
  adding: content/.config/logs/2025.10.13/13.43.26.889435.log (deflated 56%)
  adding: content/.config/logs/2025.10.13/13.43.12.780756.log (deflated 86%)
  adding: content/.config/config_sentinel (stored 0%)
  adding: content/.config/default_configs.db (deflated 98%)
  adding: content/.config/gce (stored 0%)
  adding: content/.config/configurations/ (stored 0%)
  adding: content/.config/configurations/c

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>