In [43]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

data = pd.read_csv("model_df.csv", index_col=0)
data['date'] = pd.to_datetime(data['Date'])
data = data.sort_values(by="Date", ascending=True)
data['home_1h_score'] = data['Home 1H Points']
data['away_1h_score'] = data['Away 1H Points']
data['home_score'] = data['Home Full Points']
data['away_score'] = data['Away Full Points']
data['total'] = data['home_score'] + data['away_score']
data['home_2h_score'] = data['home_score'] - data['home_1h_score']
data['away_2h_score'] = data['away_score'] - data['away_1h_score']
data['home_team'] = data['Team_home']
data['away_team'] = data['Team_away']
data['margin'] = data['home_score'] - data['away_score']

In [44]:
data["home_opp_score_rank"] = data["Rk_away"] - data['home_score']
data['away_opp_score_rank'] = data['Rk_home'] - data['away_score']
data['home_sos'] = data['Rk_away']
data['away_sos'] = data['Rk_home']

In [45]:
# Melt data to unify home and away records with opponent scores
home_df = data[['date', 'home_team', 'home_score', 'AdjT_Rk_away', 'AdjO_Rk_away', 'AdjD_Rk_away', 'away_score', 'home_sos', "home_opp_score_rank"]].rename(columns={
    'home_team': 'team', 'home_score': 'score', 'away_score': 'opponent_score', 'home_sos': 'sos', "home_opp_score_rank": "opp_score_rank", 'AdjT_Rk_away': "opp_tempo", 'AdjO_Rk_away': 'opp_off', 'AdjD_Rk_away':"opp_def",
})
away_df = data[['date', 'away_team', 'away_score', 'home_score','AdjT_Rk_home', 'AdjO_Rk_home', 'AdjD_Rk_home','away_sos', "away_opp_score_rank"]].rename(columns={
    'away_team': 'team', 'away_score': 'score', 'home_score': 'opponent_score','away_sos': 'sos', "away_opp_score_rank": "opp_score_rank",'AdjT_Rk_home': "opp_tempo", 'AdjO_Rk_home': 'opp_off', 'AdjD_Rk_home':"opp_def"
})
games = pd.concat([home_df, away_df])
games.sort_values(by=['team', 'date'], inplace=True)
games['score_diff'] = games['score'] - games['opponent_score']

In [46]:
rank_columns = ['opp_tempo', 'opp_off', 'opp_def']
bin_size = 121

for col in rank_columns:
    # Define bin edges and labels
    bin_edges = list(range(1, 363 + 1, bin_size))  # Ensure the upper edge includes the maximum value
    bin_labels = [f"{bin_edges[i]}-{bin_edges[i + 1] - 1}" for i in range(len(bin_edges) - 1)]

    # Bin the column into ranges
    games[f'{col}_bin'] = pd.cut(games[col], bins=bin_edges, labels=bin_labels, right=False)

    # Create binary columns for each bin
    for label in bin_labels:
        games[f'{col}_bin_{label}'] = (games[f'{col}_bin'] == label).astype(int)

In [47]:
# Define the rolling windows
rolling_windows = [1, 3, 7]

# Calculate rolling averages for each window and store as separate columns
for window in rolling_windows:
    games[f'rolling_avg_score_{window}'] = (
        games.groupby('team')['score']
        .shift()
        .rolling(window=window, min_periods=1)
        .mean()
    )
    games[f'rolling_avg_score_allowed_{window}'] = (
        games.groupby('team')['opponent_score']
        .shift()
        .rolling(window=window, min_periods=1)
        .mean()
    )
    games[f'rolling_sos_{window}'] = (
        games.groupby('team')['sos']
        .shift()
        .rolling(window=window, min_periods=1)
        .mean()
    )
    games[f'rolling_opp_score_rank_{window}'] = (
        games.groupby('team')['opp_score_rank']
        .shift()
        .rolling(window=window, min_periods=1)
        .mean()
    )
    games[f'rolling_score_diff_{window}'] = (
        games.groupby('team')['score_diff']
        .shift()
        .rolling(window=window, min_periods=1)
        .mean()
    )

# Avoid duplicate indices when calculating rolling average score differential for bins
for col in rank_columns:
    bin_labels = [f"{bin_edges[i]}-{bin_edges[i + 1] - 1}" for i in range(len(bin_edges) - 1)]

    for label in bin_labels:
        for window in rolling_windows:
            # Ensure group indices are unique before applying transformations
            rolling_diff = (
                games.groupby('team', group_keys=False)
                .apply(
                    lambda group: group.loc[group[f'{col}_bin_{label}'] == 1, 'score_diff']
                    .shift()
                    .rolling(window=window, min_periods=1)
                    .mean()
                )
            )
            # Assign the rolling results back to the DataFrame
            games[f'rolling_avg_score_diff_{col}_bin_{label}_{window}'] = rolling_diff.reset_index(drop=True)

# Calculate days since the last game
games['days_since_last_game'] = games.groupby('team')['date'].diff().dt.days

In [49]:
columns_to_keep = ['date', 'team', 'days_since_last_game'] + \
                  [f'rolling_avg_score_{w}' for w in rolling_windows] + \
                  [f'rolling_avg_score_allowed_{w}' for w in rolling_windows] + \
                  [f'rolling_sos_{w}' for w in rolling_windows] + \
                  [f'rolling_score_diff_{w}' for w in rolling_windows] + \
                  [f'rolling_opp_score_rank_{w}' for w in rolling_windows] + \
                  [f'rolling_avg_score_diff_{col}_bin_{label}_{w}'
                   for col in rank_columns
                   for label in bin_labels
                   for w in rolling_windows]
games = games[columns_to_keep]

# Merge rolling features back to the original data on home and away team sides
# First, join for home team
data = data.merge(
    games.rename(columns={
        'team': 'home_team',
        **{f'rolling_avg_score_{w}': f'home_rolling_avg_score_{w}' for w in rolling_windows},
        **{f'rolling_avg_score_allowed_{w}': f'home_rolling_avg_score_allowed_{w}' for w in rolling_windows},
        **{f'rolling_sos_{w}': f'home_rolling_sos_{w}' for w in rolling_windows},
        **{f'rolling_score_diff_{w}': f'home_rolling_score_diff_{w}' for w in rolling_windows},
        **{f'rolling_opp_score_rank_{w}': f'home_rolling_opp_score_rank_{w}' for w in rolling_windows},
        **{f'rolling_avg_score_diff_{col}_bin_{label}_{w}': f'home_rolling_avg_score_diff_{col}_bin_{label}_{w}'
           for col in rank_columns
           for label in bin_labels
           for w in rolling_windows},
        'days_since_last_game': 'home_days_since_last_game'
    }),
    on=['date', 'home_team'],
    how='left'
)

# Then, join for away team
data = data.merge(
    games.rename(columns={
        'team': 'away_team',
        **{f'rolling_avg_score_{w}': f'away_rolling_avg_score_{w}' for w in rolling_windows},
        **{f'rolling_avg_score_allowed_{w}': f'away_rolling_avg_score_allowed_{w}' for w in rolling_windows},
        **{f'rolling_sos_{w}': f'away_rolling_sos_{w}' for w in rolling_windows},
        **{f'rolling_score_diff_{w}': f'away_rolling_score_diff_{w}' for w in rolling_windows},
        **{f'rolling_opp_score_rank_{w}': f'away_rolling_opp_score_rank_{w}' for w in rolling_windows},
        **{f'rolling_avg_score_diff_{col}_bin_{label}_{w}': f'away_rolling_avg_score_diff_{col}_bin_{label}_{w}'
           for col in rank_columns
           for label in bin_labels
           for w in rolling_windows},
        'days_since_last_game': 'away_days_since_last_game'
    }),
    on=['date', 'away_team'],
    how='left'
)

In [52]:
df = data.dropna()

In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("train.csv")

  df = pd.read_csv("train.csv")


In [2]:
df['margin'] = df['home_score'] - df['away_score']
df['home_win'] = np.where(df['home_score'] > df['away_score'], 1, 0)
df['away_win'] = np.where(df['away_score'] > df['home_score'], 1, 0)

### LIGHTGBM NO OUTLIER REDUCTION

In [57]:
import optuna
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
import joblib
import os
import pandas as pd

os.makedirs("lgb_models", exist_ok=True)

high_outlier_thresholds = {'home_score': 91, 'away_score': 78, 'total': 167}
low_outlier_thresholds = {'home_score': 56, 'away_score': 52, 'total': 118}

for target_column in ['home_score', 'away_score', 'total', 'margin']:
    categorical_columns = ['Conf_home', 'Conf_away']

    model_df = df[[target_column, 'Date', 
       'Rk_home', 'Conf_home', 'AdjEM_home', 'AdjO_home',
       'AdjO_Rk_home', 'AdjD_home', 'AdjD_Rk_home', 'AdjT_home',
       'AdjT_Rk_home','Rk_rolling_1_home',
       'AdjEM_rolling_1_home', 'AdjO_rolling_1_home',
       'AdjO_Rk_rolling_1_home', 'AdjD_rolling_1_home',
       'AdjD_Rk_rolling_1_home', 'AdjT_rolling_1_home',
       'AdjT_Rk_rolling_1_home', 'Rk_rolling_3_home',
       'AdjEM_rolling_3_home', 'AdjO_rolling_3_home',
       'AdjO_Rk_rolling_3_home', 'AdjD_rolling_3_home',
       'AdjD_Rk_rolling_3_home', 'AdjT_rolling_3_home',
       'AdjT_Rk_rolling_3_home', 'Rk_rolling_5_home',
       'AdjEM_rolling_5_home', 'AdjO_rolling_5_home',
       'AdjO_Rk_rolling_5_home', 'AdjD_rolling_5_home',
       'AdjD_Rk_rolling_5_home', 'AdjT_rolling_5_home',
       'AdjT_Rk_rolling_5_home', 'Rk_rolling_10_home',
       'AdjEM_rolling_10_home', 'AdjO_rolling_10_home',
       'AdjO_Rk_rolling_10_home', 'AdjD_rolling_10_home',
       'AdjD_Rk_rolling_10_home', 'AdjT_rolling_10_home',
       'AdjT_Rk_rolling_10_home', 'Rk_rolling_20_home',
       'AdjEM_rolling_20_home', 'AdjO_rolling_20_home',
       'AdjO_Rk_rolling_20_home', 'AdjD_rolling_20_home',
       'AdjD_Rk_rolling_20_home', 'AdjT_rolling_20_home',
       'AdjT_Rk_rolling_20_home', 'Rk_away', 'Conf_away',
       'AdjEM_away', 'AdjO_away', 'AdjO_Rk_away', 'AdjD_away',
       'AdjD_Rk_away', 'AdjT_away', 'AdjT_Rk_away',
       'Rk_rolling_1_away', 'AdjEM_rolling_1_away', 'AdjO_rolling_1_away',
       'AdjO_Rk_rolling_1_away', 'AdjD_rolling_1_away',
       'AdjD_Rk_rolling_1_away', 'AdjT_rolling_1_away',
       'AdjT_Rk_rolling_1_away', 'Rk_rolling_3_away',
       'AdjEM_rolling_3_away', 'AdjO_rolling_3_away',
       'AdjO_Rk_rolling_3_away', 'AdjD_rolling_3_away',
       'AdjD_Rk_rolling_3_away', 'AdjT_rolling_3_away',
       'AdjT_Rk_rolling_3_away', 'Rk_rolling_5_away',
       'AdjEM_rolling_5_away', 'AdjO_rolling_5_away',
       'AdjO_Rk_rolling_5_away', 'AdjD_rolling_5_away',
       'AdjD_Rk_rolling_5_away', 'AdjT_rolling_5_away',
       'AdjT_Rk_rolling_5_away', 'Rk_rolling_10_away',
       'AdjEM_rolling_10_away', 'AdjO_rolling_10_away',
       'AdjO_Rk_rolling_10_away', 'AdjD_rolling_10_away',
       'AdjD_Rk_rolling_10_away', 'AdjT_rolling_10_away',
       'AdjT_Rk_rolling_10_away', 'Rk_rolling_20_away',
       'AdjEM_rolling_20_away', 'AdjO_rolling_20_away',
       'AdjO_Rk_rolling_20_away', 'AdjD_rolling_20_away',
       'AdjD_Rk_rolling_20_away', 'AdjT_rolling_20_away',
       'AdjT_Rk_rolling_20_away', 'home_sos',
       'away_sos', 'home_days_since_last_game',
       'home_rolling_avg_score_1', 'home_rolling_avg_score_3',
       'home_rolling_avg_score_7', 'home_rolling_avg_score_allowed_1',
       'home_rolling_avg_score_allowed_3',
       'home_rolling_avg_score_allowed_7', 'home_rolling_sos_1',
       'home_rolling_sos_3', 'home_rolling_sos_7',
       'home_rolling_score_diff_1', 'home_rolling_score_diff_3',
       'home_rolling_score_diff_7', 'home_rolling_opp_score_rank_1',
       'home_rolling_opp_score_rank_3', 'home_rolling_opp_score_rank_7',
       'home_rolling_avg_score_diff_opp_tempo_bin_1-121_1',
       'home_rolling_avg_score_diff_opp_tempo_bin_1-121_3',
       'home_rolling_avg_score_diff_opp_tempo_bin_1-121_7',
       'home_rolling_avg_score_diff_opp_tempo_bin_122-242_1',
       'home_rolling_avg_score_diff_opp_tempo_bin_122-242_3',
       'home_rolling_avg_score_diff_opp_tempo_bin_122-242_7',
       'home_rolling_avg_score_diff_opp_off_bin_1-121_1',
       'home_rolling_avg_score_diff_opp_off_bin_1-121_3',
       'home_rolling_avg_score_diff_opp_off_bin_1-121_7',
       'home_rolling_avg_score_diff_opp_off_bin_122-242_1',
       'home_rolling_avg_score_diff_opp_off_bin_122-242_3',
       'home_rolling_avg_score_diff_opp_off_bin_122-242_7',
       'home_rolling_avg_score_diff_opp_def_bin_1-121_1',
       'home_rolling_avg_score_diff_opp_def_bin_1-121_3',
       'home_rolling_avg_score_diff_opp_def_bin_1-121_7',
       'home_rolling_avg_score_diff_opp_def_bin_122-242_1',
       'home_rolling_avg_score_diff_opp_def_bin_122-242_3',
       'home_rolling_avg_score_diff_opp_def_bin_122-242_7',
       'away_days_since_last_game', 'away_rolling_avg_score_1',
       'away_rolling_avg_score_3', 'away_rolling_avg_score_7',
       'away_rolling_avg_score_allowed_1',
       'away_rolling_avg_score_allowed_3',
       'away_rolling_avg_score_allowed_7', 'away_rolling_sos_1',
       'away_rolling_sos_3', 'away_rolling_sos_7',
       'away_rolling_score_diff_1', 'away_rolling_score_diff_3',
       'away_rolling_score_diff_7', 'away_rolling_opp_score_rank_1',
       'away_rolling_opp_score_rank_3', 'away_rolling_opp_score_rank_7',
       'away_rolling_avg_score_diff_opp_tempo_bin_1-121_1',
       'away_rolling_avg_score_diff_opp_tempo_bin_1-121_3',
       'away_rolling_avg_score_diff_opp_tempo_bin_1-121_7',
       'away_rolling_avg_score_diff_opp_tempo_bin_122-242_1',
       'away_rolling_avg_score_diff_opp_tempo_bin_122-242_3',
       'away_rolling_avg_score_diff_opp_tempo_bin_122-242_7',
       'away_rolling_avg_score_diff_opp_off_bin_1-121_1',
       'away_rolling_avg_score_diff_opp_off_bin_1-121_3',
       'away_rolling_avg_score_diff_opp_off_bin_1-121_7',
       'away_rolling_avg_score_diff_opp_off_bin_122-242_1',
       'away_rolling_avg_score_diff_opp_off_bin_122-242_3',
       'away_rolling_avg_score_diff_opp_off_bin_122-242_7',
       'away_rolling_avg_score_diff_opp_def_bin_1-121_1',
       'away_rolling_avg_score_diff_opp_def_bin_1-121_3',
       'away_rolling_avg_score_diff_opp_def_bin_1-121_7',
       'away_rolling_avg_score_diff_opp_def_bin_122-242_1',
       'away_rolling_avg_score_diff_opp_def_bin_122-242_3',
       'away_rolling_avg_score_diff_opp_def_bin_122-242_7']]
    
    for cat_col in categorical_columns:
        le = LabelEncoder()
        model_df[cat_col] = le.fit_transform(df[cat_col])

    iso_forest = IsolationForest(contamination=0.1, random_state=42)
    iso_forest.fit(model_df.drop([target_column, 'Date'], axis=1))
    
    model_df['outlier_score'] = iso_forest.decision_function(model_df.drop([target_column, 'Date'], axis=1))
    model_df['date'] = pd.to_datetime(df['Date'])
    model_df = model_df.sort_values(by='date')

    test_ratio = 0.2
    split_index = int(len(model_df) * (1 - test_ratio))

    train_data = model_df.iloc[:split_index]
    test_data = model_df.iloc[split_index:]

    X_train = train_data.drop(columns=['date', 'Date', target_column])
    y_train = train_data[target_column]

    X_test = test_data.drop(columns=['date', 'Date', target_column])
    y_test = test_data[target_column]

    def objective_lgb(trial):
        param = {
            'objective': 'regression',
            'metric': 'rmse',
            'boosting_type': 'gbdt',
            'max_depth': trial.suggest_int('max_depth', -1, 10),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'n_estimators': trial.suggest_int('n_estimators', 100, 1500),
            'num_leaves': trial.suggest_int('num_leaves', 20, 150),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
            'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        }
        
        train_data = lgb.Dataset(X_train, label=y_train)
        valid_data = lgb.Dataset(X_test, label=y_test)
        
        model = lgb.train(
            params=param,
            train_set=train_data,
            valid_sets=[train_data, valid_data],
            valid_names=["train", "valid"]        )
        
        y_pred = model.predict(X_test, num_iteration=model.best_iteration)
        rmse = mean_squared_error(y_test, y_pred, squared=False)
        return rmse



    study_lgb = optuna.create_study(direction='minimize')
    study_lgb.optimize(objective_lgb, n_trials=50)
    print(f"Best LightGBM parameters for {target_column}: {study_lgb.best_params}")

    best_lgb = lgb.LGBMRegressor(**study_lgb.best_params)
    best_lgb.fit(X_train, y_train)
    y_pred_lgb = best_lgb.predict(X_test)

    best_model_filename = f"lgb_models/{target_column}_best_model.joblib"
    joblib.dump(best_lgb, best_model_filename)

    def display_metrics(y_true, y_pred, model_name):
        r2 = r2_score(y_true, y_pred)
        mae = mean_absolute_error(y_true, y_pred)
        rmse = mean_squared_error(y_true, y_pred, squared=False)
        print(f"{model_name} R^2: {r2:.4f}")
        print(f"{model_name} MAE: {mae:.4f}")
        print(f"{model_name} RMSE: {rmse:.4f}")

    display_metrics(y_test, y_pred_lgb, "LightGBM with Outlier Score Feature")

[I 2024-11-21 21:56:09,404] A new study created in memory with name: no-name-31e51ed2-a97c-4aff-aada-e26a5ebac4fd


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:11,071] Trial 0 finished with value: 10.242925244688543 and parameters: {'max_depth': 2, 'learning_rate': 0.213724231856149, 'n_estimators': 1166, 'num_leaves': 50, 'min_child_samples': 24, 'subsample': 0.9746548968257516, 'colsample_bytree': 0.7004021301236947, 'reg_alpha': 3.929451847336554, 'reg_lambda': 0.8476246903633039}. Best is trial 0 with value: 10.242925244688543.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:18,745] Trial 1 finished with value: 9.73995486958958 and parameters: {'max_depth': 8, 'learning_rate': 0.010077651416027785, 'n_estimators': 1015, 'num_leaves': 83, 'min_child_samples': 37, 'subsample': 0.7264194260179396, 'colsample_bytree': 0.6859685101030504, 'reg_alpha': 4.18477632229433, 'reg_lambda': 4.340750678031889}. Best is trial 1 with value: 9.73995486958958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:23,271] Trial 2 finished with value: 10.295265989698844 and parameters: {'max_depth': 7, 'learning_rate': 0.14270793991950587, 'n_estimators': 712, 'num_leaves': 75, 'min_child_samples': 40, 'subsample': 0.500991945723636, 'colsample_bytree': 0.6952524737941062, 'reg_alpha': 4.575111080303611, 'reg_lambda': 2.5127909926176875}. Best is trial 1 with value: 9.73995486958958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:24,315] Trial 3 finished with value: 10.636013925769245 and parameters: {'max_depth': 7, 'learning_rate': 0.2990000891498518, 'n_estimators': 281, 'num_leaves': 20, 'min_child_samples': 16, 'subsample': 0.5071622862076071, 'colsample_bytree': 0.6979807276383452, 'reg_alpha': 0.19601301798844784, 'reg_lambda': 0.24018271522445178}. Best is trial 1 with value: 9.73995486958958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:25,123] Trial 4 finished with value: 9.690461188105767 and parameters: {'max_depth': 4, 'learning_rate': 0.027627112158896378, 'n_estimators': 293, 'num_leaves': 118, 'min_child_samples': 17, 'subsample': 0.5001016071555924, 'colsample_bytree': 0.5136554197225748, 'reg_alpha': 3.0163630141553273, 'reg_lambda': 0.03878884546170469}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:35,335] Trial 5 finished with value: 9.841588969690488 and parameters: {'max_depth': 0, 'learning_rate': 0.019738493385123495, 'n_estimators': 1222, 'num_leaves': 50, 'min_child_samples': 31, 'subsample': 0.669194462731636, 'colsample_bytree': 0.6528849670253735, 'reg_alpha': 3.6487541803429933, 'reg_lambda': 4.671417604750996}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:36,280] Trial 6 finished with value: 9.955502937792547 and parameters: {'max_depth': 1, 'learning_rate': 0.011837301564051718, 'n_estimators': 858, 'num_leaves': 86, 'min_child_samples': 30, 'subsample': 0.9896124217557809, 'colsample_bytree': 0.9723980023522736, 'reg_alpha': 4.026509483597178, 'reg_lambda': 0.23923184967241162}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:50,330] Trial 7 finished with value: 10.013794205116657 and parameters: {'max_depth': 10, 'learning_rate': 0.08176414105358078, 'n_estimators': 1353, 'num_leaves': 113, 'min_child_samples': 22, 'subsample': 0.5236073508836657, 'colsample_bytree': 0.5181152148947757, 'reg_alpha': 3.1724073941977533, 'reg_lambda': 1.3090622060180335}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:50,801] Trial 8 finished with value: 9.99231941905506 and parameters: {'max_depth': 1, 'learning_rate': 0.027955991645482976, 'n_estimators': 301, 'num_leaves': 140, 'min_child_samples': 32, 'subsample': 0.6496630386455682, 'colsample_bytree': 0.5836920190099231, 'reg_alpha': 2.0109619168120707, 'reg_lambda': 2.1579905902502787}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:56:59,191] Trial 9 finished with value: 9.749990036888862 and parameters: {'max_depth': 10, 'learning_rate': 0.010338037808892772, 'n_estimators': 939, 'num_leaves': 93, 'min_child_samples': 46, 'subsample': 0.6931889486512427, 'colsample_bytree': 0.5395614880529569, 'reg_alpha': 1.81244380369922, 'reg_lambda': 1.7763773715351105}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:00,787] Trial 10 finished with value: 9.776913345938857 and parameters: {'max_depth': 4, 'learning_rate': 0.045481608714757954, 'n_estimators': 517, 'num_leaves': 149, 'min_child_samples': 5, 'subsample': 0.8383263470251661, 'colsample_bytree': 0.8166340480554082, 'reg_alpha': 4.9613008717179525, 'reg_lambda': 0.07323863923339338}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:01,825] Trial 11 finished with value: 9.725843409453518 and parameters: {'max_depth': 6, 'learning_rate': 0.019804616212973213, 'n_estimators': 117, 'num_leaves': 108, 'min_child_samples': 12, 'subsample': 0.7876039485289154, 'colsample_bytree': 0.6067805436721563, 'reg_alpha': 2.90344383838687, 'reg_lambda': 3.272296995478087}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:02,361] Trial 12 finished with value: 9.699256909809579 and parameters: {'max_depth': 4, 'learning_rate': 0.028397497013096167, 'n_estimators': 168, 'num_leaves': 119, 'min_child_samples': 11, 'subsample': 0.8175053037955415, 'colsample_bytree': 0.5082818903269591, 'reg_alpha': 2.6894902821219473, 'reg_lambda': 3.0778847889686687}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:03,695] Trial 13 finished with value: 9.805744544226025 and parameters: {'max_depth': 4, 'learning_rate': 0.0456036068760678, 'n_estimators': 545, 'num_leaves': 126, 'min_child_samples': 13, 'subsample': 0.5864707926808207, 'colsample_bytree': 0.5105877552455902, 'reg_alpha': 2.6548824395104877, 'reg_lambda': 3.0031475704697317}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:03,995] Trial 14 finished with value: 9.782015482997394 and parameters: {'max_depth': 3, 'learning_rate': 0.03157893929085501, 'n_estimators': 124, 'num_leaves': 123, 'min_child_samples': 6, 'subsample': 0.8030776293762336, 'colsample_bytree': 0.5041907772198275, 'reg_alpha': 2.1755311650866735, 'reg_lambda': 1.2704236233734882}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:05,767] Trial 15 finished with value: 9.952711106570789 and parameters: {'max_depth': 5, 'learning_rate': 0.07218423509246259, 'n_estimators': 405, 'num_leaves': 133, 'min_child_samples': 19, 'subsample': 0.6160117747810112, 'colsample_bytree': 0.5934043316161985, 'reg_alpha': 3.2889281764269667, 'reg_lambda': 3.616352438285599}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:15,225] Trial 16 finished with value: 9.801592595524868 and parameters: {'max_depth': -1, 'learning_rate': 0.030579225692139934, 'n_estimators': 616, 'num_leaves': 104, 'min_child_samples': 13, 'subsample': 0.8782522880470278, 'colsample_bytree': 0.5751294558292827, 'reg_alpha': 1.3948218919453956, 'reg_lambda': 2.7070182342607465}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:15,916] Trial 17 finished with value: 9.752503850239206 and parameters: {'max_depth': 3, 'learning_rate': 0.016999010332262622, 'n_estimators': 307, 'num_leaves': 66, 'min_child_samples': 9, 'subsample': 0.7333530440044107, 'colsample_bytree': 0.6299307053910637, 'reg_alpha': 3.3962619672450547, 'reg_lambda': 2.0860719018767204}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:20,862] Trial 18 finished with value: 10.056170509487362 and parameters: {'max_depth': 5, 'learning_rate': 0.06088151223635941, 'n_estimators': 1500, 'num_leaves': 116, 'min_child_samples': 24, 'subsample': 0.5854666886519159, 'colsample_bytree': 0.5728708801698924, 'reg_alpha': 2.5779610166620106, 'reg_lambda': 3.980203747721764}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:21,646] Trial 19 finished with value: 9.740013725553851 and parameters: {'max_depth': 3, 'learning_rate': 0.034575461014903254, 'n_estimators': 442, 'num_leaves': 100, 'min_child_samples': 18, 'subsample': 0.7510042028677947, 'colsample_bytree': 0.5045937861634475, 'reg_alpha': 2.9250361746256726, 'reg_lambda': 3.262498925535655}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:26,315] Trial 20 finished with value: 10.091564691223578 and parameters: {'max_depth': 6, 'learning_rate': 0.10395816484206023, 'n_estimators': 726, 'num_leaves': 139, 'min_child_samples': 9, 'subsample': 0.560523915059942, 'colsample_bytree': 0.5541129078053377, 'reg_alpha': 2.293644242952506, 'reg_lambda': 4.923635522666357}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:27,231] Trial 21 finished with value: 9.712310643050781 and parameters: {'max_depth': 6, 'learning_rate': 0.020856644404607938, 'n_estimators': 118, 'num_leaves': 111, 'min_child_samples': 14, 'subsample': 0.7921722048329972, 'colsample_bytree': 0.6106290944305425, 'reg_alpha': 2.9079472265054247, 'reg_lambda': 3.5161445006372345}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:29,964] Trial 22 finished with value: 9.704185042069842 and parameters: {'max_depth': 8, 'learning_rate': 0.024097930549573182, 'n_estimators': 205, 'num_leaves': 121, 'min_child_samples': 16, 'subsample': 0.8702828757265731, 'colsample_bytree': 0.5520395047696204, 'reg_alpha': 3.5643841541319103, 'reg_lambda': 2.8567812935968235}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:32,621] Trial 23 finished with value: 9.775848086832848 and parameters: {'max_depth': 9, 'learning_rate': 0.04258539534247419, 'n_estimators': 224, 'num_leaves': 125, 'min_child_samples': 19, 'subsample': 0.8868822327023945, 'colsample_bytree': 0.5486210474680764, 'reg_alpha': 3.3972194883650904, 'reg_lambda': 2.7898904801108575}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:36,695] Trial 24 finished with value: 9.717830031012532 and parameters: {'max_depth': 8, 'learning_rate': 0.024963759896220912, 'n_estimators': 390, 'num_leaves': 96, 'min_child_samples': 9, 'subsample': 0.9106228915170337, 'colsample_bytree': 0.5502696726688832, 'reg_alpha': 3.6496073170626953, 'reg_lambda': 2.3426182536509725}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:37,579] Trial 25 finished with value: 9.726697903923197 and parameters: {'max_depth': 4, 'learning_rate': 0.014196763153662919, 'n_estimators': 240, 'num_leaves': 147, 'min_child_samples': 26, 'subsample': 0.8422746596670376, 'colsample_bytree': 0.5005846494381482, 'reg_alpha': 2.5343714075233965, 'reg_lambda': 2.912522938778815}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:37,954] Trial 26 finished with value: 9.925192245344613 and parameters: {'max_depth': 2, 'learning_rate': 0.015523087479315563, 'n_estimators': 226, 'num_leaves': 129, 'min_child_samples': 21, 'subsample': 0.9283658675800066, 'colsample_bytree': 0.5502831186922994, 'reg_alpha': 4.266074679644077, 'reg_lambda': 1.7070051881394201}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:39,570] Trial 27 finished with value: 9.723682952219171 and parameters: {'max_depth': 5, 'learning_rate': 0.023793029143855657, 'n_estimators': 389, 'num_leaves': 117, 'min_child_samples': 16, 'subsample': 0.6972727390619581, 'colsample_bytree': 0.6391748727493645, 'reg_alpha': 3.6571854860718576, 'reg_lambda': 2.414104786121361}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:44,030] Trial 28 finished with value: 9.829084433397488 and parameters: {'max_depth': 7, 'learning_rate': 0.039511968103826664, 'n_estimators': 596, 'num_leaves': 135, 'min_child_samples': 11, 'subsample': 0.641471056705305, 'colsample_bytree': 0.591660685237109, 'reg_alpha': 3.0681030677300383, 'reg_lambda': 3.8297947469621265}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:44,350] Trial 29 finished with value: 9.879420301094267 and parameters: {'max_depth': 2, 'learning_rate': 0.024832240016320804, 'n_estimators': 172, 'num_leaves': 88, 'min_child_samples': 27, 'subsample': 0.9625410550501985, 'colsample_bytree': 0.641052659814317, 'reg_alpha': 3.8292052353461137, 'reg_lambda': 0.6923799759692362}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:48,172] Trial 30 finished with value: 9.822871253772272 and parameters: {'max_depth': 9, 'learning_rate': 0.03171529106910182, 'n_estimators': 476, 'num_leaves': 66, 'min_child_samples': 22, 'subsample': 0.7642824696860797, 'colsample_bytree': 0.5344116096412458, 'reg_alpha': 3.3795115179638673, 'reg_lambda': 0.7478958061481185}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:49,131] Trial 31 finished with value: 9.701601935583493 and parameters: {'max_depth': 6, 'learning_rate': 0.021682188558100217, 'n_estimators': 129, 'num_leaves': 109, 'min_child_samples': 15, 'subsample': 0.8147821942286897, 'colsample_bytree': 0.611973919634504, 'reg_alpha': 2.793487668298657, 'reg_lambda': 3.4467042469124407}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:52,522] Trial 32 finished with value: 9.694983058079698 and parameters: {'max_depth': 8, 'learning_rate': 0.017625993958558205, 'n_estimators': 331, 'num_leaves': 118, 'min_child_samples': 17, 'subsample': 0.8477172347942495, 'colsample_bytree': 0.5668653266956563, 'reg_alpha': 2.7421110668040245, 'reg_lambda': 3.1198873398688005}. Best is trial 4 with value: 9.690461188105767.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:54,680] Trial 33 finished with value: 9.681774935924286 and parameters: {'max_depth': 6, 'learning_rate': 0.014476269172454164, 'n_estimators': 331, 'num_leaves': 103, 'min_child_samples': 8, 'subsample': 0.8272842914562182, 'colsample_bytree': 0.6670396463340189, 'reg_alpha': 2.740488480549128, 'reg_lambda': 4.123025374819715}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:57:56,294] Trial 34 finished with value: 9.701424695208804 and parameters: {'max_depth': 7, 'learning_rate': 0.013414650181596025, 'n_estimators': 347, 'num_leaves': 22, 'min_child_samples': 6, 'subsample': 0.7686992964852797, 'colsample_bytree': 0.6651710830107969, 'reg_alpha': 2.3867436159591544, 'reg_lambda': 4.470595229929103}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:02,961] Trial 35 finished with value: 9.745049904236438 and parameters: {'max_depth': 8, 'learning_rate': 0.016910996966373517, 'n_estimators': 709, 'num_leaves': 79, 'min_child_samples': 10, 'subsample': 0.8248012148048075, 'colsample_bytree': 0.7336777967507182, 'reg_alpha': 3.1066254138216722, 'reg_lambda': 4.115740257826845}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:04,364] Trial 36 finished with value: 9.700368872839581 and parameters: {'max_depth': 5, 'learning_rate': 0.012727319698321036, 'n_estimators': 334, 'num_leaves': 99, 'min_child_samples': 35, 'subsample': 0.7247241981878124, 'colsample_bytree': 0.6663490908686007, 'reg_alpha': 2.545333088362291, 'reg_lambda': 4.287843642184841}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:05,914] Trial 37 finished with value: 9.691289718431948 and parameters: {'max_depth': 4, 'learning_rate': 0.018208318465336985, 'n_estimators': 630, 'num_leaves': 103, 'min_child_samples': 7, 'subsample': 0.8516066177577387, 'colsample_bytree': 0.5263166297007096, 'reg_alpha': 2.7225979295007057, 'reg_lambda': 3.706115005553661}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:15,425] Trial 38 finished with value: 9.765863467513181 and parameters: {'max_depth': 9, 'learning_rate': 0.018103085704319812, 'n_estimators': 821, 'num_leaves': 104, 'min_child_samples': 7, 'subsample': 0.8559177923837541, 'colsample_bytree': 0.5695735101284929, 'reg_alpha': 1.9310412528212588, 'reg_lambda': 4.388571448012585}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:16,520] Trial 39 finished with value: 9.740351426367054 and parameters: {'max_depth': 3, 'learning_rate': 0.01074092332687561, 'n_estimators': 622, 'num_leaves': 90, 'min_child_samples': 8, 'subsample': 0.8383429256039981, 'colsample_bytree': 0.5333346284201235, 'reg_alpha': 3.065975969985818, 'reg_lambda': 3.776277036595165}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:21,912] Trial 40 finished with value: 9.784145972578404 and parameters: {'max_depth': 7, 'learning_rate': 0.014696069267728644, 'n_estimators': 1029, 'num_leaves': 82, 'min_child_samples': 48, 'subsample': 0.7833631619177984, 'colsample_bytree': 0.6952208095726164, 'reg_alpha': 2.1031672265975967, 'reg_lambda': 3.9865741355795343}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:22,715] Trial 41 finished with value: 9.698529242630908 and parameters: {'max_depth': 4, 'learning_rate': 0.01859629101637205, 'n_estimators': 283, 'num_leaves': 119, 'min_child_samples': 11, 'subsample': 0.8198876493823531, 'colsample_bytree': 0.5276049502585479, 'reg_alpha': 2.662112576084103, 'reg_lambda': 3.1340701840174128}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:24,024] Trial 42 finished with value: 9.713452813927107 and parameters: {'max_depth': 4, 'learning_rate': 0.018727856670987458, 'n_estimators': 482, 'num_leaves': 105, 'min_child_samples': 5, 'subsample': 0.8575310013515859, 'colsample_bytree': 0.5315108467511471, 'reg_alpha': 2.809109935425243, 'reg_lambda': 3.3922833676714554}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:25,267] Trial 43 finished with value: 9.718068029868238 and parameters: {'max_depth': 5, 'learning_rate': 0.01156880213556624, 'n_estimators': 262, 'num_leaves': 114, 'min_child_samples': 17, 'subsample': 0.8147385940727501, 'colsample_bytree': 0.5250889857180836, 'reg_alpha': 2.3626909678207144, 'reg_lambda': 3.162926305288219}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:26,106] Trial 44 finished with value: 9.930256469055273 and parameters: {'max_depth': 1, 'learning_rate': 0.012468831343864345, 'n_estimators': 933, 'num_leaves': 96, 'min_child_samples': 12, 'subsample': 0.8951852825175367, 'colsample_bytree': 0.5740202544888671, 'reg_alpha': 1.67270485139537, 'reg_lambda': 3.687602084976368}. Best is trial 33 with value: 9.681774935924286.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:28,083] Trial 45 finished with value: 9.676483794717404 and parameters: {'max_depth': 6, 'learning_rate': 0.01618587306432535, 'n_estimators': 337, 'num_leaves': 72, 'min_child_samples': 14, 'subsample': 0.520126632158319, 'colsample_bytree': 0.6189020927690683, 'reg_alpha': 3.1366556482329577, 'reg_lambda': 2.6878904293131196}. Best is trial 45 with value: 9.676483794717404.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:30,866] Trial 46 finished with value: 9.704867048320168 and parameters: {'max_depth': 6, 'learning_rate': 0.015731726130489135, 'n_estimators': 565, 'num_leaves': 72, 'min_child_samples': 24, 'subsample': 0.5048827703468807, 'colsample_bytree': 0.6005936640311536, 'reg_alpha': 3.1466313648173, 'reg_lambda': 2.657173825195455}. Best is trial 45 with value: 9.676483794717404.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:35,767] Trial 47 finished with value: 9.705619177352473 and parameters: {'max_depth': 7, 'learning_rate': 0.010058600532571906, 'n_estimators': 699, 'num_leaves': 58, 'min_child_samples': 14, 'subsample': 0.5356342459634796, 'colsample_bytree': 0.6259988590719654, 'reg_alpha': 3.9728543308231794, 'reg_lambda': 0.07156861012030735}. Best is trial 45 with value: 9.676483794717404.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:38,016] Trial 48 finished with value: 9.679769212087711 and parameters: {'max_depth': 6, 'learning_rate': 0.014233185158566706, 'n_estimators': 440, 'num_leaves': 39, 'min_child_samples': 20, 'subsample': 0.5367760271014307, 'colsample_bytree': 0.6706342251428918, 'reg_alpha': 3.252022364481605, 'reg_lambda': 2.549224330691051}. Best is trial 45 with value: 9.676483794717404.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533


[I 2024-11-21 21:58:40,176] Trial 49 finished with value: 9.68518088042137 and parameters: {'max_depth': 6, 'learning_rate': 0.013958199914882298, 'n_estimators': 450, 'num_leaves': 29, 'min_child_samples': 20, 'subsample': 0.5293429866971587, 'colsample_bytree': 0.715437769865603, 'reg_alpha': 3.2230606904511356, 'reg_lambda': 2.0799837603354505}. Best is trial 45 with value: 9.676483794717404.


Best LightGBM parameters for home_score: {'max_depth': 6, 'learning_rate': 0.01618587306432535, 'n_estimators': 337, 'num_leaves': 72, 'min_child_samples': 14, 'subsample': 0.520126632158319, 'colsample_bytree': 0.6189020927690683, 'reg_alpha': 3.1366556482329577, 'reg_lambda': 2.6878904293131196}
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 75.011533
LightGBM with Outlier Score Feature R^2: 0.1720
LightGBM with Outlier Score Feature MAE: 7.6466
LightGBM with Outlier Score Feature RMSE: 9.6765


[I 2024-11-21 21:58:42,981] A new study created in memory with name: no-name-8dd5a4c6-55ca-4bf6-8e60-d08cbbc66642


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:58:44,829] Trial 0 finished with value: 10.515365015779647 and parameters: {'max_depth': -1, 'learning_rate': 0.2558507207867621, 'n_estimators': 264, 'num_leaves': 38, 'min_child_samples': 37, 'subsample': 0.9438979704926936, 'colsample_bytree': 0.9800863296479254, 'reg_alpha': 1.1237555616150425, 'reg_lambda': 0.48081346704159755}. Best is trial 0 with value: 10.515365015779647.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:58:50,208] Trial 1 finished with value: 10.028055173025422 and parameters: {'max_depth': 9, 'learning_rate': 0.0445040994349695, 'n_estimators': 571, 'num_leaves': 66, 'min_child_samples': 29, 'subsample': 0.8745666903527031, 'colsample_bytree': 0.893427529890915, 'reg_alpha': 3.2862971662771594, 'reg_lambda': 0.47843780116268164}. Best is trial 1 with value: 10.028055173025422.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:58:51,710] Trial 2 finished with value: 9.932534946355537 and parameters: {'max_depth': 3, 'learning_rate': 0.014809105401010356, 'n_estimators': 811, 'num_leaves': 86, 'min_child_samples': 9, 'subsample': 0.7857049824156692, 'colsample_bytree': 0.7365841011700374, 'reg_alpha': 1.72727916385135, 'reg_lambda': 2.4254591552339666}. Best is trial 2 with value: 9.932534946355537.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:58:56,848] Trial 3 finished with value: 10.05886767722082 and parameters: {'max_depth': 8, 'learning_rate': 0.0537688366893165, 'n_estimators': 730, 'num_leaves': 57, 'min_child_samples': 26, 'subsample': 0.8955731325627432, 'colsample_bytree': 0.640423986345809, 'reg_alpha': 4.880899426767078, 'reg_lambda': 4.354488699200907}. Best is trial 2 with value: 9.932534946355537.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:58:57,830] Trial 4 finished with value: 9.954585543228086 and parameters: {'max_depth': 2, 'learning_rate': 0.029886080323839624, 'n_estimators': 613, 'num_leaves': 65, 'min_child_samples': 31, 'subsample': 0.9167883846243968, 'colsample_bytree': 0.8675222621542008, 'reg_alpha': 2.6740978920267615, 'reg_lambda': 4.762566613927268}. Best is trial 2 with value: 9.932534946355537.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:06,300] Trial 5 finished with value: 10.211465962824422 and parameters: {'max_depth': 0, 'learning_rate': 0.15416164853105802, 'n_estimators': 1280, 'num_leaves': 140, 'min_child_samples': 40, 'subsample': 0.8141948381364841, 'colsample_bytree': 0.6628341815006429, 'reg_alpha': 4.557803234727325, 'reg_lambda': 2.276327873078774}. Best is trial 2 with value: 9.932534946355537.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:10,709] Trial 6 finished with value: 10.237700479397237 and parameters: {'max_depth': 9, 'learning_rate': 0.1362128425930863, 'n_estimators': 396, 'num_leaves': 83, 'min_child_samples': 14, 'subsample': 0.9631011590190555, 'colsample_bytree': 0.9644121126957328, 'reg_alpha': 0.35835771659218696, 'reg_lambda': 1.857689541212531}. Best is trial 2 with value: 9.932534946355537.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:19,452] Trial 7 finished with value: 9.900328788740445 and parameters: {'max_depth': 10, 'learning_rate': 0.013700792057298648, 'n_estimators': 866, 'num_leaves': 74, 'min_child_samples': 32, 'subsample': 0.7482223551499396, 'colsample_bytree': 0.9558524344801901, 'reg_alpha': 2.2306654767441265, 'reg_lambda': 0.2853719167883151}. Best is trial 7 with value: 9.900328788740445.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:20,809] Trial 8 finished with value: 9.966035973925338 and parameters: {'max_depth': 2, 'learning_rate': 0.02209638291218082, 'n_estimators': 1153, 'num_leaves': 55, 'min_child_samples': 18, 'subsample': 0.6698153121704506, 'colsample_bytree': 0.65463811274253, 'reg_alpha': 1.7810315799169936, 'reg_lambda': 4.034327936252831}. Best is trial 7 with value: 9.900328788740445.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:22,608] Trial 9 finished with value: 9.856688747105146 and parameters: {'max_depth': 8, 'learning_rate': 0.03921393064394329, 'n_estimators': 138, 'num_leaves': 147, 'min_child_samples': 12, 'subsample': 0.8606658898951257, 'colsample_bytree': 0.7161511318535067, 'reg_alpha': 2.4832330922067554, 'reg_lambda': 0.10228087159935284}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:23,154] Trial 10 finished with value: 9.863500390988332 and parameters: {'max_depth': 6, 'learning_rate': 0.08135583203061596, 'n_estimators': 109, 'num_leaves': 145, 'min_child_samples': 50, 'subsample': 0.5737094892822887, 'colsample_bytree': 0.5207407711305693, 'reg_alpha': 3.4422032759234957, 'reg_lambda': 1.2006166923265034}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:23,964] Trial 11 finished with value: 9.938120676074904 and parameters: {'max_depth': 7, 'learning_rate': 0.08413883622799785, 'n_estimators': 132, 'num_leaves': 150, 'min_child_samples': 50, 'subsample': 0.5180908428907204, 'colsample_bytree': 0.561624013264214, 'reg_alpha': 3.589562996727452, 'reg_lambda': 1.2676908425498736}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:24,503] Trial 12 finished with value: 9.868304333798699 and parameters: {'max_depth': 6, 'learning_rate': 0.06028630830095531, 'n_estimators': 104, 'num_leaves': 122, 'min_child_samples': 50, 'subsample': 0.5905912123862428, 'colsample_bytree': 0.5423930008862986, 'reg_alpha': 3.2876098276072097, 'reg_lambda': 1.2404903604157667}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:26,104] Trial 13 finished with value: 9.902776410113981 and parameters: {'max_depth': 5, 'learning_rate': 0.031005351694852388, 'n_estimators': 392, 'num_leaves': 110, 'min_child_samples': 5, 'subsample': 0.692479847131184, 'colsample_bytree': 0.5311723030676863, 'reg_alpha': 4.052259943192569, 'reg_lambda': 0.08217094228011645}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:30,924] Trial 14 finished with value: 10.2618699961163 and parameters: {'max_depth': 5, 'learning_rate': 0.08462834444600667, 'n_estimators': 1487, 'num_leaves': 113, 'min_child_samples': 20, 'subsample': 0.503668380158685, 'colsample_bytree': 0.5003666637369135, 'reg_alpha': 2.7402961631751133, 'reg_lambda': 1.3090434266960358}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:33,048] Trial 15 finished with value: 9.959563415821735 and parameters: {'max_depth': 7, 'learning_rate': 0.03983506597955541, 'n_estimators': 343, 'num_leaves': 132, 'min_child_samples': 45, 'subsample': 0.9882648769691915, 'colsample_bytree': 0.7760581365304384, 'reg_alpha': 3.9161346809595647, 'reg_lambda': 3.1763853277559657}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:33,689] Trial 16 finished with value: 9.900948483727257 and parameters: {'max_depth': 4, 'learning_rate': 0.02125501436058302, 'n_estimators': 221, 'num_leaves': 101, 'min_child_samples': 24, 'subsample': 0.8409182415183903, 'colsample_bytree': 0.5929289251065035, 'reg_alpha': 2.9514874406945077, 'reg_lambda': 0.8496065369924579}. Best is trial 9 with value: 9.856688747105146.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:37,130] Trial 17 finished with value: 9.845841532685576 and parameters: {'max_depth': 6, 'learning_rate': 0.01095258481733028, 'n_estimators': 511, 'num_leaves': 148, 'min_child_samples': 13, 'subsample': 0.75326341757486, 'colsample_bytree': 0.6020557785610214, 'reg_alpha': 2.3086677139613974, 'reg_lambda': 0.8278793183175779}. Best is trial 17 with value: 9.845841532685576.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:45,333] Trial 18 finished with value: 9.842906948696935 and parameters: {'max_depth': 10, 'learning_rate': 0.010168739723822566, 'n_estimators': 468, 'num_leaves': 124, 'min_child_samples': 13, 'subsample': 0.8736680684128321, 'colsample_bytree': 0.7166954632190473, 'reg_alpha': 2.2952820245267396, 'reg_lambda': 0.17488725326165344}. Best is trial 18 with value: 9.842906948696935.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:57,825] Trial 19 finished with value: 9.872451990912785 and parameters: {'max_depth': 10, 'learning_rate': 0.01033088521784389, 'n_estimators': 956, 'num_leaves': 130, 'min_child_samples': 18, 'subsample': 0.7783141172685408, 'colsample_bytree': 0.6016066343489309, 'reg_alpha': 2.110747312878347, 'reg_lambda': 0.7321597167916648}. Best is trial 18 with value: 9.842906948696935.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 21:59:58,417] Trial 20 finished with value: 10.178587305367765 and parameters: {'max_depth': 1, 'learning_rate': 0.010693988517253027, 'n_estimators': 537, 'num_leaves': 101, 'min_child_samples': 5, 'subsample': 0.834652051985052, 'colsample_bytree': 0.7891757074711371, 'reg_alpha': 1.5517002053163336, 'reg_lambda': 0.026069626785813482}. Best is trial 18 with value: 9.842906948696935.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:03,530] Trial 21 finished with value: 9.853234370292107 and parameters: {'max_depth': 8, 'learning_rate': 0.016147927773660916, 'n_estimators': 443, 'num_leaves': 134, 'min_child_samples': 12, 'subsample': 0.8657615163970911, 'colsample_bytree': 0.7170989151543968, 'reg_alpha': 2.4214592110174378, 'reg_lambda': 0.016369878600557275}. Best is trial 18 with value: 9.842906948696935.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:08,960] Trial 22 finished with value: 9.839981862199208 and parameters: {'max_depth': 8, 'learning_rate': 0.015050561196532038, 'n_estimators': 469, 'num_leaves': 128, 'min_child_samples': 12, 'subsample': 0.9276717993618703, 'colsample_bytree': 0.7050057875274083, 'reg_alpha': 2.360011971876373, 'reg_lambda': 0.7106878924475639}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:16,581] Trial 23 finished with value: 9.84496788931283 and parameters: {'max_depth': 9, 'learning_rate': 0.01023876307020376, 'n_estimators': 666, 'num_leaves': 122, 'min_child_samples': 21, 'subsample': 0.9160313154516673, 'colsample_bytree': 0.6552954160787829, 'reg_alpha': 2.8450383072781986, 'reg_lambda': 0.7945787574551101}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:24,972] Trial 24 finished with value: 9.910529650455818 and parameters: {'max_depth': 10, 'learning_rate': 0.01944978147593291, 'n_estimators': 689, 'num_leaves': 124, 'min_child_samples': 23, 'subsample': 0.9130269330427192, 'colsample_bytree': 0.687427461489915, 'reg_alpha': 3.0511286797519777, 'reg_lambda': 1.8337780273475208}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:36,000] Trial 25 finished with value: 9.900217675367928 and parameters: {'max_depth': 9, 'learning_rate': 0.013064435785814704, 'n_estimators': 988, 'num_leaves': 112, 'min_child_samples': 17, 'subsample': 0.991727681893636, 'colsample_bytree': 0.6937735292263874, 'reg_alpha': 2.9200715293195794, 'reg_lambda': 0.5614268157437889}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:39,039] Trial 26 finished with value: 9.883961876009721 and parameters: {'max_depth': 9, 'learning_rate': 0.0175601789070751, 'n_estimators': 695, 'num_leaves': 20, 'min_child_samples': 9, 'subsample': 0.9407115529213353, 'colsample_bytree': 0.7558145692913912, 'reg_alpha': 2.0313777682680056, 'reg_lambda': 0.7448173493624625}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:41,921] Trial 27 finished with value: 9.862645722387299 and parameters: {'max_depth': 7, 'learning_rate': 0.010335269452138418, 'n_estimators': 275, 'num_leaves': 96, 'min_child_samples': 21, 'subsample': 0.901189944761358, 'colsample_bytree': 0.7910560050245883, 'reg_alpha': 2.6888736618639064, 'reg_lambda': 1.0310392814310072}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:51,474] Trial 28 finished with value: 9.859554471042868 and parameters: {'max_depth': 10, 'learning_rate': 0.013438053276936186, 'n_estimators': 628, 'num_leaves': 120, 'min_child_samples': 9, 'subsample': 0.9423937142629537, 'colsample_bytree': 0.648249967544957, 'reg_alpha': 1.2692721867780365, 'reg_lambda': 1.5909790714700227}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:56,465] Trial 29 finished with value: 9.876710966787748 and parameters: {'max_depth': 8, 'learning_rate': 0.024758010094769373, 'n_estimators': 481, 'num_leaves': 137, 'min_child_samples': 16, 'subsample': 0.9545509395426444, 'colsample_bytree': 0.6894658516929062, 'reg_alpha': 0.8876381496044461, 'reg_lambda': 0.4295621566866592}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:00:59,571] Trial 30 finished with value: 9.866291714534539 and parameters: {'max_depth': 9, 'learning_rate': 0.016082209563617993, 'n_estimators': 252, 'num_leaves': 93, 'min_child_samples': 34, 'subsample': 0.8845401781489719, 'colsample_bytree': 0.7501019892495979, 'reg_alpha': 1.985047434952179, 'reg_lambda': 0.5393947479565219}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:02,733] Trial 31 finished with value: 9.852836426848278 and parameters: {'max_depth': 6, 'learning_rate': 0.012012611020524979, 'n_estimators': 499, 'num_leaves': 123, 'min_child_samples': 14, 'subsample': 0.9106064016464164, 'colsample_bytree': 0.623367523270413, 'reg_alpha': 2.3811235363710157, 'reg_lambda': 0.8393994861229173}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:08,494] Trial 32 finished with value: 9.84860958759082 and parameters: {'max_depth': 7, 'learning_rate': 0.012450598119159469, 'n_estimators': 595, 'num_leaves': 139, 'min_child_samples': 11, 'subsample': 0.8653636038977519, 'colsample_bytree': 0.6205843868873386, 'reg_alpha': 2.3397912320114527, 'reg_lambda': 0.39461744011096206}. Best is trial 22 with value: 9.839981862199208.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:16,458] Trial 33 finished with value: 9.824367727740958 and parameters: {'max_depth': 8, 'learning_rate': 0.010155270894452326, 'n_estimators': 747, 'num_leaves': 130, 'min_child_samples': 15, 'subsample': 0.8013116168046219, 'colsample_bytree': 0.5737206372554986, 'reg_alpha': 3.0610329410967485, 'reg_lambda': 0.912806475450298}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:23,043] Trial 34 finished with value: 9.83900622043422 and parameters: {'max_depth': 8, 'learning_rate': 0.010004149349316838, 'n_estimators': 791, 'num_leaves': 108, 'min_child_samples': 27, 'subsample': 0.8185699346751102, 'colsample_bytree': 0.5785125387702998, 'reg_alpha': 3.0963050473573426, 'reg_lambda': 0.3762575653109149}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:28,723] Trial 35 finished with value: 9.910511718660292 and parameters: {'max_depth': 8, 'learning_rate': 0.015018485348517603, 'n_estimators': 815, 'num_leaves': 111, 'min_child_samples': 38, 'subsample': 0.8300098412361172, 'colsample_bytree': 0.5736737027748039, 'reg_alpha': 3.3030046735819605, 'reg_lambda': 0.3395675694862382}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:30,922] Trial 36 finished with value: 9.922738129111261 and parameters: {'max_depth': 4, 'learning_rate': 0.017993668906558624, 'n_estimators': 943, 'num_leaves': 105, 'min_child_samples': 29, 'subsample': 0.802224059884141, 'colsample_bytree': 0.5613337285158957, 'reg_alpha': 3.0615106291730996, 'reg_lambda': 0.3781337567138542}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:37,409] Trial 37 finished with value: 9.873012090423613 and parameters: {'max_depth': 8, 'learning_rate': 0.014581179926459777, 'n_estimators': 783, 'num_leaves': 129, 'min_child_samples': 27, 'subsample': 0.8841558388724614, 'colsample_bytree': 0.5744175836273785, 'reg_alpha': 2.565402754144736, 'reg_lambda': 1.082947630083787}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:01:51,199] Trial 38 finished with value: 9.90215665093678 and parameters: {'max_depth': 10, 'learning_rate': 0.012189080898455048, 'n_estimators': 1097, 'num_leaves': 116, 'min_child_samples': 24, 'subsample': 0.8460910647454193, 'colsample_bytree': 0.627087333696687, 'reg_alpha': 2.656524681161937, 'reg_lambda': 0.2650072110950508}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:00,208] Trial 39 finished with value: 9.906763866844333 and parameters: {'max_depth': 9, 'learning_rate': 0.016424631324175244, 'n_estimators': 782, 'num_leaves': 79, 'min_child_samples': 7, 'subsample': 0.813641399101072, 'colsample_bytree': 0.8211485961456672, 'reg_alpha': 1.8373482118029325, 'reg_lambda': 1.5147461375246154}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:07,415] Trial 40 finished with value: 9.961740990841676 and parameters: {'max_depth': 7, 'learning_rate': 0.024428296690346456, 'n_estimators': 901, 'num_leaves': 91, 'min_child_samples': 10, 'subsample': 0.7911776108001636, 'colsample_bytree': 0.6718977101057699, 'reg_alpha': 3.1155652906129583, 'reg_lambda': 0.5548915344715774}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:14,867] Trial 41 finished with value: 9.852637531570084 and parameters: {'max_depth': 9, 'learning_rate': 0.010194477117100571, 'n_estimators': 631, 'num_leaves': 127, 'min_child_samples': 21, 'subsample': 0.8872938447088234, 'colsample_bytree': 0.6563870325235522, 'reg_alpha': 2.7959185374460938, 'reg_lambda': 0.9914991586524975}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:21,936] Trial 42 finished with value: 9.859666831998888 and parameters: {'max_depth': 8, 'learning_rate': 0.012296096887698447, 'n_estimators': 710, 'num_leaves': 117, 'min_child_samples': 15, 'subsample': 0.9315265470017855, 'colsample_bytree': 0.717378579274563, 'reg_alpha': 2.132920038533906, 'reg_lambda': 0.7520762159077029}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:29,406] Trial 43 finished with value: 9.84595524767187 and parameters: {'max_depth': 9, 'learning_rate': 0.010091431687450296, 'n_estimators': 569, 'num_leaves': 141, 'min_child_samples': 20, 'subsample': 0.9209147600123471, 'colsample_bytree': 0.6690073177944755, 'reg_alpha': 2.842812161287352, 'reg_lambda': 0.6202518272507587}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:38,141] Trial 44 finished with value: 9.915682674233741 and parameters: {'max_depth': 10, 'learning_rate': 0.013893731155707259, 'n_estimators': 861, 'num_leaves': 106, 'min_child_samples': 31, 'subsample': 0.8593587098229506, 'colsample_bytree': 0.6348620337373995, 'reg_alpha': 3.5696709434256304, 'reg_lambda': 0.2766503380472643}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:41,920] Trial 45 finished with value: 9.84935120974748 and parameters: {'max_depth': 8, 'learning_rate': 0.011809745956993736, 'n_estimators': 344, 'num_leaves': 118, 'min_child_samples': 26, 'subsample': 0.9756518421461915, 'colsample_bytree': 0.7305450658157145, 'reg_alpha': 2.5403595873651756, 'reg_lambda': 1.0043369477221045}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:54,274] Trial 46 finished with value: 9.900211042060684 and parameters: {'max_depth': 9, 'learning_rate': 0.014106581274462513, 'n_estimators': 1071, 'num_leaves': 135, 'min_child_samples': 18, 'subsample': 0.9671081064114434, 'colsample_bytree': 0.7001135098072087, 'reg_alpha': 3.2594656631608916, 'reg_lambda': 0.21920438006937604}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:02:56,881] Trial 47 finished with value: 9.848010285646556 and parameters: {'max_depth': 7, 'learning_rate': 0.018604052189109616, 'n_estimators': 431, 'num_leaves': 143, 'min_child_samples': 34, 'subsample': 0.897596458246393, 'colsample_bytree': 0.5479521205758603, 'reg_alpha': 2.803921394571001, 'reg_lambda': 0.032378755870258624}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:03:11,365] Trial 48 finished with value: 9.875179518542232 and parameters: {'max_depth': 0, 'learning_rate': 0.01170333371891739, 'n_estimators': 757, 'num_leaves': 125, 'min_child_samples': 7, 'subsample': 0.8193958504369021, 'colsample_bytree': 0.6497038193191684, 'reg_alpha': 2.2366871083117577, 'reg_lambda': 1.371116491239491}. Best is trial 33 with value: 9.824367727740958.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959


[I 2024-11-21 22:03:17,700] Trial 49 finished with value: 9.872880672224962 and parameters: {'max_depth': 10, 'learning_rate': 0.015194406710889721, 'n_estimators': 690, 'num_leaves': 66, 'min_child_samples': 15, 'subsample': 0.8474835184079109, 'colsample_bytree': 0.5860599922352759, 'reg_alpha': 2.5652688358734603, 'reg_lambda': 0.5798619080333429}. Best is trial 33 with value: 9.824367727740958.


Best LightGBM parameters for away_score: {'max_depth': 8, 'learning_rate': 0.010155270894452326, 'n_estimators': 747, 'num_leaves': 130, 'min_child_samples': 15, 'subsample': 0.8013116168046219, 'colsample_bytree': 0.5737206372554986, 'reg_alpha': 3.0610329410967485, 'reg_lambda': 0.912806475450298}
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 63.093959
LightGBM with Outlier Score Feature R^2: 0.1537
LightGBM with Outlier Score Feature MAE: 7.7007
LightGBM with Outlier Score Feature RMSE: 9.8244


[I 2024-11-21 22:03:25,740] A new study created in memory with name: no-name-f56cd30c-e9ba-417e-9e7c-f0bbe0e6cd69


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:26,383] Trial 0 finished with value: 18.157943879050908 and parameters: {'max_depth': 1, 'learning_rate': 0.22190891062702423, 'n_estimators': 599, 'num_leaves': 52, 'min_child_samples': 47, 'subsample': 0.9233609192560507, 'colsample_bytree': 0.5942315654720637, 'reg_alpha': 1.708917235640945, 'reg_lambda': 3.5090500051198403}. Best is trial 0 with value: 18.157943879050908.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:27,666] Trial 1 finished with value: 17.79220715660715 and parameters: {'max_depth': 7, 'learning_rate': 0.0510732391264313, 'n_estimators': 190, 'num_leaves': 105, 'min_child_samples': 28, 'subsample': 0.8858192301693752, 'colsample_bytree': 0.6463285763066935, 'reg_alpha': 2.3685784071170497, 'reg_lambda': 3.7104650512530304}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:34,190] Trial 2 finished with value: 18.11814908980838 and parameters: {'max_depth': 7, 'learning_rate': 0.02404443232935073, 'n_estimators': 1444, 'num_leaves': 26, 'min_child_samples': 31, 'subsample': 0.7068616029088713, 'colsample_bytree': 0.943669224585375, 'reg_alpha': 3.1508563098378812, 'reg_lambda': 3.141777815904712}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:36,897] Trial 3 finished with value: 18.42477869969776 and parameters: {'max_depth': 8, 'learning_rate': 0.11252238214966359, 'n_estimators': 363, 'num_leaves': 69, 'min_child_samples': 34, 'subsample': 0.8809998933832903, 'colsample_bytree': 0.8848897315576079, 'reg_alpha': 2.464926024583157, 'reg_lambda': 2.990336640263356}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:43,836] Trial 4 finished with value: 17.904802201563474 and parameters: {'max_depth': 0, 'learning_rate': 0.025641396203400883, 'n_estimators': 741, 'num_leaves': 64, 'min_child_samples': 37, 'subsample': 0.7674607394932643, 'colsample_bytree': 0.6213708096127268, 'reg_alpha': 2.454812553583203, 'reg_lambda': 2.831606665124695}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:45,452] Trial 5 finished with value: 17.969735423796628 and parameters: {'max_depth': 4, 'learning_rate': 0.036865048409719915, 'n_estimators': 789, 'num_leaves': 104, 'min_child_samples': 32, 'subsample': 0.8237801407772238, 'colsample_bytree': 0.5055828050777594, 'reg_alpha': 0.5429558365557252, 'reg_lambda': 0.9428408914887376}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:45,787] Trial 6 finished with value: 17.99721053266549 and parameters: {'max_depth': 1, 'learning_rate': 0.17554818391784618, 'n_estimators': 322, 'num_leaves': 119, 'min_child_samples': 22, 'subsample': 0.8510761153373703, 'colsample_bytree': 0.5075906238205802, 'reg_alpha': 0.9351955857601035, 'reg_lambda': 3.2305928278937603}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:03:50,287] Trial 7 finished with value: 17.852564958427852 and parameters: {'max_depth': 7, 'learning_rate': 0.022371329312556986, 'n_estimators': 715, 'num_leaves': 142, 'min_child_samples': 24, 'subsample': 0.7693481932990541, 'colsample_bytree': 0.7219974138534909, 'reg_alpha': 1.7887248146521029, 'reg_lambda': 3.7707923688523364}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:00,189] Trial 8 finished with value: 17.8365988341497 and parameters: {'max_depth': 9, 'learning_rate': 0.010903077432334683, 'n_estimators': 1267, 'num_leaves': 75, 'min_child_samples': 27, 'subsample': 0.7207797650482999, 'colsample_bytree': 0.5216229254779257, 'reg_alpha': 4.294112806477203, 'reg_lambda': 0.4190741596462688}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:09,466] Trial 9 finished with value: 18.372895719323473 and parameters: {'max_depth': -1, 'learning_rate': 0.0986838614380474, 'n_estimators': 1067, 'num_leaves': 51, 'min_child_samples': 21, 'subsample': 0.7011836391673986, 'colsample_bytree': 0.9841851268533474, 'reg_alpha': 2.994509600710529, 'reg_lambda': 1.7957728762343605}. Best is trial 1 with value: 17.79220715660715.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:10,022] Trial 10 finished with value: 17.721441458377864 and parameters: {'max_depth': 4, 'learning_rate': 0.06182030424486212, 'n_estimators': 172, 'num_leaves': 102, 'min_child_samples': 10, 'subsample': 0.9970428899304321, 'colsample_bytree': 0.7702452674548248, 'reg_alpha': 4.946503891190152, 'reg_lambda': 4.865555288955185}. Best is trial 10 with value: 17.721441458377864.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:10,457] Trial 11 finished with value: 17.710039361984173 and parameters: {'max_depth': 4, 'learning_rate': 0.06255870713716122, 'n_estimators': 112, 'num_leaves': 102, 'min_child_samples': 6, 'subsample': 0.9882533165121143, 'colsample_bytree': 0.7523267475784808, 'reg_alpha': 4.746595928591543, 'reg_lambda': 4.837855904009075}. Best is trial 11 with value: 17.710039361984173.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:10,839] Trial 12 finished with value: 17.766131275597928 and parameters: {'max_depth': 3, 'learning_rate': 0.07525991309717073, 'n_estimators': 161, 'num_leaves': 96, 'min_child_samples': 5, 'subsample': 0.9929347421130463, 'colsample_bytree': 0.8074617137536322, 'reg_alpha': 4.963754763718536, 'reg_lambda': 4.787032588706967}. Best is trial 11 with value: 17.710039361984173.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:12,709] Trial 13 finished with value: 17.93030984794818 and parameters: {'max_depth': 5, 'learning_rate': 0.06001590153060231, 'n_estimators': 457, 'num_leaves': 132, 'min_child_samples': 6, 'subsample': 0.9894826837559191, 'colsample_bytree': 0.7740506799877487, 'reg_alpha': 4.8838599211487415, 'reg_lambda': 4.990296726005447}. Best is trial 11 with value: 17.710039361984173.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:13,607] Trial 14 finished with value: 18.100310390357823 and parameters: {'max_depth': 3, 'learning_rate': 0.09595114430640073, 'n_estimators': 459, 'num_leaves': 89, 'min_child_samples': 12, 'subsample': 0.5860542349475542, 'colsample_bytree': 0.8438037443584975, 'reg_alpha': 4.029116918956152, 'reg_lambda': 4.285910939538911}. Best is trial 11 with value: 17.710039361984173.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:14,376] Trial 15 finished with value: 17.694991278972193 and parameters: {'max_depth': 5, 'learning_rate': 0.04492700149532717, 'n_estimators': 168, 'num_leaves': 115, 'min_child_samples': 14, 'subsample': 0.9419335942289515, 'colsample_bytree': 0.7192256447796824, 'reg_alpha': 4.035895768919745, 'reg_lambda': 4.366045999126408}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:17,791] Trial 16 finished with value: 18.076847732057384 and parameters: {'max_depth': 5, 'learning_rate': 0.03930151221129028, 'n_estimators': 959, 'num_leaves': 125, 'min_child_samples': 17, 'subsample': 0.9399115242074773, 'colsample_bytree': 0.7118032495506487, 'reg_alpha': 3.8702218513993434, 'reg_lambda': 4.276327174369418}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:19,379] Trial 17 finished with value: 18.392201798516044 and parameters: {'max_depth': 10, 'learning_rate': 0.15168787610925194, 'n_estimators': 120, 'num_leaves': 115, 'min_child_samples': 16, 'subsample': 0.9303872062862615, 'colsample_bytree': 0.7078062696702107, 'reg_alpha': 3.565265911853684, 'reg_lambda': 2.2953113592997063}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:22,905] Trial 18 finished with value: 18.813532754062642 and parameters: {'max_depth': 6, 'learning_rate': 0.2595987176662798, 'n_estimators': 571, 'num_leaves': 149, 'min_child_samples': 11, 'subsample': 0.8207636237813225, 'colsample_bytree': 0.6733382000846142, 'reg_alpha': 4.344077437043738, 'reg_lambda': 4.244152311097966}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:23,401] Trial 19 finished with value: 17.803338289649325 and parameters: {'max_depth': 2, 'learning_rate': 0.07660376613945628, 'n_estimators': 316, 'num_leaves': 83, 'min_child_samples': 16, 'subsample': 0.9438978812645302, 'colsample_bytree': 0.7474016642040496, 'reg_alpha': 4.466220073356432, 'reg_lambda': 4.138462408647218}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:28,698] Trial 20 finished with value: 18.680389587523113 and parameters: {'max_depth': 6, 'learning_rate': 0.12714869876216314, 'n_estimators': 1064, 'num_leaves': 139, 'min_child_samples': 41, 'subsample': 0.8976040449574759, 'colsample_bytree': 0.8117010604052031, 'reg_alpha': 3.52729088708515, 'reg_lambda': 4.615731922869423}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:29,715] Trial 21 finished with value: 17.82688921191489 and parameters: {'max_depth': 4, 'learning_rate': 0.05835901194245176, 'n_estimators': 254, 'num_leaves': 109, 'min_child_samples': 9, 'subsample': 0.9886132347206233, 'colsample_bytree': 0.7627465399252283, 'reg_alpha': 4.841419446095188, 'reg_lambda': 4.9988520673236625}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:30,173] Trial 22 finished with value: 17.712480578240683 and parameters: {'max_depth': 3, 'learning_rate': 0.043266915659310505, 'n_estimators': 135, 'num_leaves': 96, 'min_child_samples': 12, 'subsample': 0.9963189814376578, 'colsample_bytree': 0.7628109717460294, 'reg_alpha': 4.616923009039157, 'reg_lambda': 4.585170465765287}. Best is trial 15 with value: 17.694991278972193.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:30,717] Trial 23 finished with value: 17.691404082792957 and parameters: {'max_depth': 3, 'learning_rate': 0.04147649768255862, 'n_estimators': 113, 'num_leaves': 90, 'min_child_samples': 14, 'subsample': 0.9579929212341004, 'colsample_bytree': 0.6840141956937931, 'reg_alpha': 4.454984223591596, 'reg_lambda': 3.9182593324379065}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:32,666] Trial 24 finished with value: 17.759932294968035 and parameters: {'max_depth': 5, 'learning_rate': 0.0325655496221247, 'n_estimators': 421, 'num_leaves': 81, 'min_child_samples': 18, 'subsample': 0.9507747455152977, 'colsample_bytree': 0.6753119485626248, 'reg_alpha': 4.024271796080691, 'reg_lambda': 3.9153168927826902}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:33,080] Trial 25 finished with value: 17.81379166789448 and parameters: {'max_depth': 2, 'learning_rate': 0.04902299734982302, 'n_estimators': 101, 'num_leaves': 125, 'min_child_samples': 5, 'subsample': 0.905867706725056, 'colsample_bytree': 0.6925040958466466, 'reg_alpha': 4.470562477817076, 'reg_lambda': 3.4916460312594717}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:33,687] Trial 26 finished with value: 17.76574439751269 and parameters: {'max_depth': 2, 'learning_rate': 0.07731642407195585, 'n_estimators': 263, 'num_leaves': 91, 'min_child_samples': 14, 'subsample': 0.956439007091632, 'colsample_bytree': 0.7291457401404876, 'reg_alpha': 4.056374226806266, 'reg_lambda': 4.489142749130311}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:36,795] Trial 27 finished with value: 17.866847286231888 and parameters: {'max_depth': 6, 'learning_rate': 0.03311442770097515, 'n_estimators': 566, 'num_leaves': 114, 'min_child_samples': 20, 'subsample': 0.8658425294616037, 'colsample_bytree': 0.6547017962531327, 'reg_alpha': 4.494180957064346, 'reg_lambda': 3.957516466854766}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:37,512] Trial 28 finished with value: 17.763505305654252 and parameters: {'max_depth': 3, 'learning_rate': 0.05008650510011981, 'n_estimators': 268, 'num_leaves': 58, 'min_child_samples': 8, 'subsample': 0.9564443674759986, 'colsample_bytree': 0.6128273316767493, 'reg_alpha': 3.712561321160825, 'reg_lambda': 4.505357888608647}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:38,199] Trial 29 finished with value: 18.12665731667232 and parameters: {'max_depth': 1, 'learning_rate': 0.1900705783748444, 'n_estimators': 635, 'num_leaves': 40, 'min_child_samples': 48, 'subsample': 0.8978102619728534, 'colsample_bytree': 0.5797626614653442, 'reg_alpha': 3.393788959495688, 'reg_lambda': 3.7709008816814737}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:43,420] Trial 30 finished with value: 17.915050331795236 and parameters: {'max_depth': 0, 'learning_rate': 0.030161952797724045, 'n_estimators': 397, 'num_leaves': 79, 'min_child_samples': 43, 'subsample': 0.9249232998160086, 'colsample_bytree': 0.7312549657593039, 'reg_alpha': 4.096734923259045, 'reg_lambda': 3.480861477950418}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:44,005] Trial 31 finished with value: 17.69947473872502 and parameters: {'max_depth': 3, 'learning_rate': 0.04701115929981116, 'n_estimators': 224, 'num_leaves': 96, 'min_child_samples': 13, 'subsample': 0.969558795814201, 'colsample_bytree': 0.6963379476705631, 'reg_alpha': 4.648608852920089, 'reg_lambda': 4.614735233273372}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:44,705] Trial 32 finished with value: 17.700675646397595 and parameters: {'max_depth': 4, 'learning_rate': 0.04315553643439569, 'n_estimators': 202, 'num_leaves': 96, 'min_child_samples': 8, 'subsample': 0.9655375328355901, 'colsample_bytree': 0.6880757782122516, 'reg_alpha': 4.636975552748773, 'reg_lambda': 4.16960358823839}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:45,078] Trial 33 finished with value: 17.763744575883784 and parameters: {'max_depth': 2, 'learning_rate': 0.044631174746846226, 'n_estimators': 213, 'num_leaves': 93, 'min_child_samples': 13, 'subsample': 0.9159130645245057, 'colsample_bytree': 0.6890620668118125, 'reg_alpha': 4.30089480499351, 'reg_lambda': 4.03932932701512}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:46,524] Trial 34 finished with value: 17.783591661918503 and parameters: {'max_depth': 5, 'learning_rate': 0.04012841975222781, 'n_estimators': 349, 'num_leaves': 71, 'min_child_samples': 24, 'subsample': 0.9640163198347398, 'colsample_bytree': 0.6512537425972806, 'reg_alpha': 4.691964532157884, 'reg_lambda': 3.5363398861537005}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:47,525] Trial 35 finished with value: 17.854703333217522 and parameters: {'max_depth': 3, 'learning_rate': 0.04825905850703377, 'n_estimators': 522, 'num_leaves': 110, 'min_child_samples': 8, 'subsample': 0.91842970950668, 'colsample_bytree': 0.6311281679605583, 'reg_alpha': 3.764304891057917, 'reg_lambda': 4.334399103428233}. Best is trial 23 with value: 17.691404082792957.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:49,549] Trial 36 finished with value: 17.669342920788456 and parameters: {'max_depth': 7, 'learning_rate': 0.02665124806918099, 'n_estimators': 231, 'num_leaves': 87, 'min_child_samples': 14, 'subsample': 0.8798943537969484, 'colsample_bytree': 0.6662704880851743, 'reg_alpha': 3.1683904630483823, 'reg_lambda': 4.022111976520268}. Best is trial 36 with value: 17.669342920788456.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:52,382] Trial 37 finished with value: 17.66266810194907 and parameters: {'max_depth': 8, 'learning_rate': 0.02126091673322758, 'n_estimators': 278, 'num_leaves': 86, 'min_child_samples': 19, 'subsample': 0.879204176438534, 'colsample_bytree': 0.5960916706810273, 'reg_alpha': 3.334802661275266, 'reg_lambda': 3.2928353693413945}. Best is trial 37 with value: 17.66266810194907.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:04:55,148] Trial 38 finished with value: 17.720576118557407 and parameters: {'max_depth': 8, 'learning_rate': 0.021723860780835855, 'n_estimators': 328, 'num_leaves': 65, 'min_child_samples': 19, 'subsample': 0.875778111201901, 'colsample_bytree': 0.5788568182050957, 'reg_alpha': 3.0165604069406298, 'reg_lambda': 3.220126163352524}. Best is trial 37 with value: 17.66266810194907.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:00,140] Trial 39 finished with value: 17.791009988596183 and parameters: {'max_depth': 8, 'learning_rate': 0.018658358585393674, 'n_estimators': 664, 'num_leaves': 74, 'min_child_samples': 28, 'subsample': 0.8509355575579854, 'colsample_bytree': 0.6064452393010111, 'reg_alpha': 2.794540004354982, 'reg_lambda': 2.942901109674506}. Best is trial 37 with value: 17.66266810194907.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:05,164] Trial 40 finished with value: 17.981833191666226 and parameters: {'max_depth': 7, 'learning_rate': 0.02685655808640066, 'n_estimators': 882, 'num_leaves': 49, 'min_child_samples': 23, 'subsample': 0.8901303961072926, 'colsample_bytree': 0.6382455261494361, 'reg_alpha': 3.274397528430123, 'reg_lambda': 3.7721276228517433}. Best is trial 37 with value: 17.66266810194907.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:21,241] Trial 41 finished with value: 18.072108366789738 and parameters: {'max_depth': 9, 'learning_rate': 0.03502000098912843, 'n_estimators': 1497, 'num_leaves': 87, 'min_child_samples': 15, 'subsample': 0.9191023268722677, 'colsample_bytree': 0.6610549399825758, 'reg_alpha': 3.268451633150131, 'reg_lambda': 3.948133994956058}. Best is trial 37 with value: 17.66266810194907.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:23,279] Trial 42 finished with value: 17.63683843249384 and parameters: {'max_depth': 7, 'learning_rate': 0.027289656568980185, 'n_estimators': 215, 'num_leaves': 102, 'min_child_samples': 18, 'subsample': 0.9367364018418163, 'colsample_bytree': 0.6309188031513474, 'reg_alpha': 2.763382304460895, 'reg_lambda': 3.6235761493080085}. Best is trial 42 with value: 17.63683843249384.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:25,426] Trial 43 finished with value: 17.706022990824454 and parameters: {'max_depth': 7, 'learning_rate': 0.028395420025504853, 'n_estimators': 277, 'num_leaves': 103, 'min_child_samples': 19, 'subsample': 0.844539503824455, 'colsample_bytree': 0.5831403572995397, 'reg_alpha': 2.7248621905457893, 'reg_lambda': 3.299241688018049}. Best is trial 42 with value: 17.63683843249384.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:30,544] Trial 44 finished with value: 17.92056786857118 and parameters: {'max_depth': 9, 'learning_rate': 0.019195507682306212, 'n_estimators': 1333, 'num_leaves': 20, 'min_child_samples': 28, 'subsample': 0.8891286002696428, 'colsample_bytree': 0.6345648456935582, 'reg_alpha': 3.497905885717631, 'reg_lambda': 3.619879491817764}. Best is trial 42 with value: 17.63683843249384.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:32,627] Trial 45 finished with value: 17.619335455107507 and parameters: {'max_depth': 8, 'learning_rate': 0.02526342516174182, 'n_estimators': 181, 'num_leaves': 124, 'min_child_samples': 26, 'subsample': 0.9289077180097298, 'colsample_bytree': 0.5497616982035004, 'reg_alpha': 2.15457379346088, 'reg_lambda': 2.7247687422626603}. Best is trial 45 with value: 17.619335455107507.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:36,636] Trial 46 finished with value: 17.82979638512399 and parameters: {'max_depth': 10, 'learning_rate': 0.024841560417395103, 'n_estimators': 400, 'num_leaves': 123, 'min_child_samples': 32, 'subsample': 0.8234534080955649, 'colsample_bytree': 0.5445784341527691, 'reg_alpha': 2.321539206436428, 'reg_lambda': 2.8163067142574216}. Best is trial 45 with value: 17.619335455107507.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:40,833] Trial 47 finished with value: 17.680242471668606 and parameters: {'max_depth': 8, 'learning_rate': 0.01700180210023734, 'n_estimators': 482, 'num_leaves': 135, 'min_child_samples': 25, 'subsample': 0.8700675103969264, 'colsample_bytree': 0.5534648179994561, 'reg_alpha': 2.3202016672171974, 'reg_lambda': 3.1026884269543986}. Best is trial 45 with value: 17.619335455107507.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:46,372] Trial 48 finished with value: 17.62539428407914 and parameters: {'max_depth': 8, 'learning_rate': 0.013694439258207663, 'n_estimators': 498, 'num_leaves': 140, 'min_child_samples': 26, 'subsample': 0.8686301244816224, 'colsample_bytree': 0.5484603526320896, 'reg_alpha': 2.210344704330178, 'reg_lambda': 2.6971007371208304}. Best is trial 45 with value: 17.619335455107507.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492


[I 2024-11-21 22:05:50,562] Trial 49 finished with value: 17.640502111524068 and parameters: {'max_depth': 9, 'learning_rate': 0.014518403252981682, 'n_estimators': 324, 'num_leaves': 145, 'min_child_samples': 26, 'subsample': 0.7940732957757326, 'colsample_bytree': 0.602422216865718, 'reg_alpha': 2.011418356490201, 'reg_lambda': 2.5805555537905143}. Best is trial 45 with value: 17.619335455107507.


Best LightGBM parameters for total: {'max_depth': 8, 'learning_rate': 0.02526342516174182, 'n_estimators': 181, 'num_leaves': 124, 'min_child_samples': 26, 'subsample': 0.9289077180097298, 'colsample_bytree': 0.5497616982035004, 'reg_alpha': 2.15457379346088, 'reg_lambda': 2.7247687422626603}
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 138.105492
LightGBM with Outlier Score Feature R^2: 0.1536
LightGBM with Outlier Score Feature MAE: 13.7907
LightGBM with Outlier Score Feature RMSE: 17.6193


[I 2024-11-21 22:05:53,285] A new study created in memory with name: no-name-390109ef-a01a-4710-9bf7-d96769cbf81d


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:00,043] Trial 0 finished with value: 8.487940033484245 and parameters: {'max_depth': 8, 'learning_rate': 0.04217033153860721, 'n_estimators': 1015, 'num_leaves': 53, 'min_child_samples': 44, 'subsample': 0.7430619577841289, 'colsample_bytree': 0.8947410176755506, 'reg_alpha': 2.909293211061896, 'reg_lambda': 2.874682190977143}. Best is trial 0 with value: 8.487940033484245.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:00,771] Trial 1 finished with value: 8.561161540083575 and parameters: {'max_depth': 1, 'learning_rate': 0.12091975860087684, 'n_estimators': 653, 'num_leaves': 137, 'min_child_samples': 31, 'subsample': 0.6279809267473585, 'colsample_bytree': 0.7933269562036294, 'reg_alpha': 1.9081035630990129, 'reg_lambda': 2.890527668776357}. Best is trial 0 with value: 8.487940033484245.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:06,484] Trial 2 finished with value: 8.319086062406036 and parameters: {'max_depth': 8, 'learning_rate': 0.012096292010979878, 'n_estimators': 1023, 'num_leaves': 39, 'min_child_samples': 38, 'subsample': 0.5098752612219889, 'colsample_bytree': 0.6645636054259476, 'reg_alpha': 1.9854791401089866, 'reg_lambda': 2.808590691582}. Best is trial 2 with value: 8.319086062406036.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:35,320] Trial 3 finished with value: 8.379225398439708 and parameters: {'max_depth': -1, 'learning_rate': 0.02386628556037765, 'n_estimators': 1435, 'num_leaves': 138, 'min_child_samples': 23, 'subsample': 0.5394164872810167, 'colsample_bytree': 0.7441968489571044, 'reg_alpha': 2.459708114124539, 'reg_lambda': 0.38893789211611574}. Best is trial 2 with value: 8.319086062406036.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:39,667] Trial 4 finished with value: 8.38980069769195 and parameters: {'max_depth': 0, 'learning_rate': 0.04768698727383958, 'n_estimators': 330, 'num_leaves': 83, 'min_child_samples': 25, 'subsample': 0.9334188131308172, 'colsample_bytree': 0.7930711417849918, 'reg_alpha': 0.3017865306722384, 'reg_lambda': 0.5664313510191837}. Best is trial 2 with value: 8.319086062406036.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:43,193] Trial 5 finished with value: 8.330605967522072 and parameters: {'max_depth': 8, 'learning_rate': 0.016003133265220747, 'n_estimators': 424, 'num_leaves': 58, 'min_child_samples': 25, 'subsample': 0.645020913505509, 'colsample_bytree': 0.8633456605739926, 'reg_alpha': 0.9554471801364739, 'reg_lambda': 2.982107613613688}. Best is trial 2 with value: 8.319086062406036.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:43,623] Trial 6 finished with value: 8.476497576308617 and parameters: {'max_depth': 3, 'learning_rate': 0.059952522334707756, 'n_estimators': 110, 'num_leaves': 98, 'min_child_samples': 35, 'subsample': 0.9204655291485393, 'colsample_bytree': 0.9592490579110202, 'reg_alpha': 4.066320013843535, 'reg_lambda': 1.005315860878016}. Best is trial 2 with value: 8.319086062406036.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:44,401] Trial 7 finished with value: 8.5695295012753 and parameters: {'max_depth': 3, 'learning_rate': 0.29271407936534377, 'n_estimators': 247, 'num_leaves': 38, 'min_child_samples': 30, 'subsample': 0.8091775189379399, 'colsample_bytree': 0.771315277948976, 'reg_alpha': 0.5948419921428355, 'reg_lambda': 4.576821504189212}. Best is trial 2 with value: 8.319086062406036.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:46,125] Trial 8 finished with value: 8.643413916200865 and parameters: {'max_depth': 7, 'learning_rate': 0.17072121508490418, 'n_estimators': 177, 'num_leaves': 147, 'min_child_samples': 14, 'subsample': 0.8577629840781955, 'colsample_bytree': 0.8390591841619475, 'reg_alpha': 2.0255460824739524, 'reg_lambda': 1.0863485249778653}. Best is trial 2 with value: 8.319086062406036.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:47,033] Trial 9 finished with value: 8.321711820317883 and parameters: {'max_depth': 5, 'learning_rate': 0.04603315538710618, 'n_estimators': 179, 'num_leaves': 116, 'min_child_samples': 41, 'subsample': 0.7468158937705951, 'colsample_bytree': 0.5855185249658421, 'reg_alpha': 1.4889386357643897, 'reg_lambda': 2.917804823985401}. Best is trial 2 with value: 8.319086062406036.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:51,050] Trial 10 finished with value: 8.309230114987216 and parameters: {'max_depth': 10, 'learning_rate': 0.010618763730782497, 'n_estimators': 1087, 'num_leaves': 20, 'min_child_samples': 49, 'subsample': 0.5474342135023149, 'colsample_bytree': 0.5062839630770222, 'reg_alpha': 4.832416720593607, 'reg_lambda': 1.8432316219712603}. Best is trial 10 with value: 8.309230114987216.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:06:55,642] Trial 11 finished with value: 8.301215703200558 and parameters: {'max_depth': 10, 'learning_rate': 0.010043675317692765, 'n_estimators': 1114, 'num_leaves': 25, 'min_child_samples': 50, 'subsample': 0.5067141909968221, 'colsample_bytree': 0.5012526845003622, 'reg_alpha': 4.917835122185874, 'reg_lambda': 1.8618569371088083}. Best is trial 11 with value: 8.301215703200558.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:00,487] Trial 12 finished with value: 8.299000750783888 and parameters: {'max_depth': 10, 'learning_rate': 0.011478578576947404, 'n_estimators': 1314, 'num_leaves': 20, 'min_child_samples': 49, 'subsample': 0.5003355932324931, 'colsample_bytree': 0.5112915336507222, 'reg_alpha': 4.844419659594827, 'reg_lambda': 1.504409495115685}. Best is trial 12 with value: 8.299000750783888.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:05,606] Trial 13 finished with value: 8.345982086338656 and parameters: {'max_depth': 10, 'learning_rate': 0.018951986000005343, 'n_estimators': 1451, 'num_leaves': 21, 'min_child_samples': 49, 'subsample': 0.5034645059377368, 'colsample_bytree': 0.5088475439980475, 'reg_alpha': 4.971961557089427, 'reg_lambda': 1.7650470389225403}. Best is trial 12 with value: 8.299000750783888.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:10,765] Trial 14 finished with value: 8.326790913807226 and parameters: {'max_depth': 6, 'learning_rate': 0.011201383880786661, 'n_estimators': 1209, 'num_leaves': 69, 'min_child_samples': 50, 'subsample': 0.6173867662897264, 'colsample_bytree': 0.5918516422570919, 'reg_alpha': 3.7017243517419667, 'reg_lambda': 1.9047922484852045}. Best is trial 12 with value: 8.299000750783888.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:16,339] Trial 15 finished with value: 8.365116469218503 and parameters: {'max_depth': 10, 'learning_rate': 0.024735271678648987, 'n_estimators': 812, 'num_leaves': 40, 'min_child_samples': 7, 'subsample': 0.5837297203559603, 'colsample_bytree': 0.5768715287247015, 'reg_alpha': 4.229239494667492, 'reg_lambda': 0.077874632044582}. Best is trial 12 with value: 8.299000750783888.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:22,498] Trial 16 finished with value: 8.29667102239905 and parameters: {'max_depth': 9, 'learning_rate': 0.01051021556006268, 'n_estimators': 1298, 'num_leaves': 21, 'min_child_samples': 44, 'subsample': 0.6810673166897221, 'colsample_bytree': 0.6716507025155121, 'reg_alpha': 3.1705408332187224, 'reg_lambda': 1.35278232552988}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:30,555] Trial 17 finished with value: 8.356689764122292 and parameters: {'max_depth': 8, 'learning_rate': 0.015195555321459846, 'n_estimators': 1301, 'num_leaves': 66, 'min_child_samples': 44, 'subsample': 0.6913089698055598, 'colsample_bytree': 0.6773669922501611, 'reg_alpha': 3.2593894593964103, 'reg_lambda': 0.9969247807373206}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:34,204] Trial 18 finished with value: 8.372954048318164 and parameters: {'max_depth': 6, 'learning_rate': 0.028742135852771206, 'n_estimators': 843, 'num_leaves': 87, 'min_child_samples': 36, 'subsample': 0.6862360427442651, 'colsample_bytree': 0.6882541247195935, 'reg_alpha': 3.4068463884560085, 'reg_lambda': 1.453509907406839}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:42,692] Trial 19 finished with value: 8.372798141085825 and parameters: {'max_depth': 9, 'learning_rate': 0.016272898676542392, 'n_estimators': 1322, 'num_leaves': 47, 'min_child_samples': 43, 'subsample': 0.9997657130999839, 'colsample_bytree': 0.6188349589409301, 'reg_alpha': 4.287479162857684, 'reg_lambda': 2.3530479855787503}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:45,351] Trial 20 finished with value: 8.34733011063547 and parameters: {'max_depth': 3, 'learning_rate': 0.020663942785186067, 'n_estimators': 1496, 'num_leaves': 32, 'min_child_samples': 20, 'subsample': 0.5886238759689484, 'colsample_bytree': 0.7267046695527317, 'reg_alpha': 2.7595418770424383, 'reg_lambda': 0.0729323151553174}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:50,341] Trial 21 finished with value: 8.305626410773188 and parameters: {'max_depth': 9, 'learning_rate': 0.010593650645080622, 'n_estimators': 1188, 'num_leaves': 26, 'min_child_samples': 46, 'subsample': 0.5006380889855366, 'colsample_bytree': 0.5416094089555689, 'reg_alpha': 4.621187951041295, 'reg_lambda': 2.2367432311786732}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:07:55,650] Trial 22 finished with value: 8.315880722814692 and parameters: {'max_depth': 9, 'learning_rate': 0.013630901303049612, 'n_estimators': 1298, 'num_leaves': 20, 'min_child_samples': 40, 'subsample': 0.561186250448177, 'colsample_bytree': 0.5435635740450467, 'reg_alpha': 3.789561797544381, 'reg_lambda': 1.4024252002332886}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:03,090] Trial 23 finished with value: 8.323998872547428 and parameters: {'max_depth': 10, 'learning_rate': 0.01012059727332931, 'n_estimators': 1134, 'num_leaves': 46, 'min_child_samples': 47, 'subsample': 0.5792744247949836, 'colsample_bytree': 0.6225409746244142, 'reg_alpha': 4.988485030176605, 'reg_lambda': 1.513080097126894}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:07,224] Trial 24 finished with value: 8.306977430588415 and parameters: {'max_depth': 7, 'learning_rate': 0.014860824807856757, 'n_estimators': 931, 'num_leaves': 33, 'min_child_samples': 50, 'subsample': 0.5367783191671079, 'colsample_bytree': 0.545453397617697, 'reg_alpha': 4.4457422911140085, 'reg_lambda': 2.1881228478880113}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:13,185] Trial 25 finished with value: 8.34269845294839 and parameters: {'max_depth': 9, 'learning_rate': 0.018649361278755064, 'n_estimators': 1358, 'num_leaves': 30, 'min_child_samples': 34, 'subsample': 0.6668466646366557, 'colsample_bytree': 0.509279171844502, 'reg_alpha': 4.594514035480594, 'reg_lambda': 0.7637471057173988}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:18,483] Trial 26 finished with value: 8.304481683426832 and parameters: {'max_depth': 10, 'learning_rate': 0.012884739895103723, 'n_estimators': 688, 'num_leaves': 60, 'min_child_samples': 40, 'subsample': 0.6053630466095274, 'colsample_bytree': 0.6307364630904693, 'reg_alpha': 3.911616228115773, 'reg_lambda': 1.3096870842173267}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:23,426] Trial 27 finished with value: 8.313648408565111 and parameters: {'max_depth': 7, 'learning_rate': 0.010332229961982766, 'n_estimators': 967, 'num_leaves': 77, 'min_child_samples': 46, 'subsample': 0.5562555049308429, 'colsample_bytree': 0.5695344677289698, 'reg_alpha': 4.443785309433099, 'reg_lambda': 0.7567121736937943}. Best is trial 16 with value: 8.29667102239905.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:27,455] Trial 28 finished with value: 8.37004307323341 and parameters: {'max_depth': 5, 'learning_rate': 0.030217084089813988, 'n_estimators': 1207, 'num_leaves': 49, 'min_child_samples': 43, 'subsample': 0.6440552872496084, 'colsample_bytree': 0.6490635257543513, 'reg_alpha': 3.5538661417413224, 'reg_lambda': 1.7786105552050264}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:35,620] Trial 29 finished with value: 8.34568119233006 and parameters: {'max_depth': 9, 'learning_rate': 0.01383246029309699, 'n_estimators': 1074, 'num_leaves': 112, 'min_child_samples': 46, 'subsample': 0.7281537483791967, 'colsample_bytree': 0.6990857753313819, 'reg_alpha': 3.1590901745300144, 'reg_lambda': 1.151359039750181}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:41,211] Trial 30 finished with value: 8.466711435993375 and parameters: {'max_depth': 6, 'learning_rate': 0.037514567162408925, 'n_estimators': 1378, 'num_leaves': 52, 'min_child_samples': 38, 'subsample': 0.5898316735412295, 'colsample_bytree': 0.6019298206289391, 'reg_alpha': 4.030258885630266, 'reg_lambda': 2.545244308612141}. Best is trial 16 with value: 8.29667102239905.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:46,534] Trial 31 finished with value: 8.31826327614789 and parameters: {'max_depth': 10, 'learning_rate': 0.014248304052184411, 'n_estimators': 632, 'num_leaves': 57, 'min_child_samples': 42, 'subsample': 0.61079217051133, 'colsample_bytree': 0.6356109729878576, 'reg_alpha': 3.904942284800287, 'reg_lambda': 1.3031479127606782}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:49,989] Trial 32 finished with value: 8.300955665545299 and parameters: {'max_depth': 10, 'learning_rate': 0.012596341922897521, 'n_estimators': 678, 'num_leaves': 28, 'min_child_samples': 45, 'subsample': 0.5297706048287019, 'colsample_bytree': 0.6420187141559529, 'reg_alpha': 4.676500378759174, 'reg_lambda': 1.5370366089175524}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:53,159] Trial 33 finished with value: 8.317064302273247 and parameters: {'max_depth': 8, 'learning_rate': 0.012451566938261313, 'n_estimators': 572, 'num_leaves': 38, 'min_child_samples': 46, 'subsample': 0.5275568187102879, 'colsample_bytree': 0.6563851835490082, 'reg_alpha': 4.71460308566063, 'reg_lambda': 1.587483651966204}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:08:56,633] Trial 34 finished with value: 8.30313584349217 and parameters: {'max_depth': 9, 'learning_rate': 0.01752561899750285, 'n_estimators': 739, 'num_leaves': 27, 'min_child_samples': 48, 'subsample': 0.525908592960616, 'colsample_bytree': 0.7225416195846958, 'reg_alpha': 4.268901599723295, 'reg_lambda': 2.0438540395607405}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:02,675] Trial 35 finished with value: 8.324309727800898 and parameters: {'max_depth': 8, 'learning_rate': 0.012274887636807561, 'n_estimators': 875, 'num_leaves': 42, 'min_child_samples': 32, 'subsample': 0.5049563852180706, 'colsample_bytree': 0.5629343877510056, 'reg_alpha': 4.659904741014099, 'reg_lambda': 1.605285309953949}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:05,618] Trial 36 finished with value: 8.297840210808635 and parameters: {'max_depth': 10, 'learning_rate': 0.02110696163746147, 'n_estimators': 516, 'num_leaves': 28, 'min_child_samples': 38, 'subsample': 0.5603447224808734, 'colsample_bytree': 0.6069434277096752, 'reg_alpha': 4.995441304786031, 'reg_lambda': 1.9169121531327893}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:08,554] Trial 37 finished with value: 8.299659528286199 and parameters: {'max_depth': 8, 'learning_rate': 0.022053452249391082, 'n_estimators': 526, 'num_leaves': 34, 'min_child_samples': 38, 'subsample': 0.567089801705724, 'colsample_bytree': 0.6564516105221455, 'reg_alpha': 3.5544689210493736, 'reg_lambda': 2.6027851449511616}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:10,999] Trial 38 finished with value: 8.30644187578724 and parameters: {'max_depth': 8, 'learning_rate': 0.021838460413456252, 'n_estimators': 436, 'num_leaves': 36, 'min_child_samples': 38, 'subsample': 0.6320952608458568, 'colsample_bytree': 0.6118127457150884, 'reg_alpha': 3.0165025009698114, 'reg_lambda': 2.523259317933964}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:11,742] Trial 39 finished with value: 8.553390382031512 and parameters: {'max_depth': 2, 'learning_rate': 0.017482200742481545, 'n_estimators': 510, 'num_leaves': 44, 'min_child_samples': 27, 'subsample': 0.5609962221601088, 'colsample_bytree': 0.6645689713019258, 'reg_alpha': 3.6030518969432963, 'reg_lambda': 3.146715889328501}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:14,098] Trial 40 finished with value: 8.330385982980369 and parameters: {'max_depth': 7, 'learning_rate': 0.02436905071943488, 'n_estimators': 349, 'num_leaves': 123, 'min_child_samples': 30, 'subsample': 0.5693937306754377, 'colsample_bytree': 0.6875490089582329, 'reg_alpha': 2.847820627978108, 'reg_lambda': 3.2424947991809625}. Best is trial 16 with value: 8.29667102239905.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:16,951] Trial 41 finished with value: 8.289247326572998 and parameters: {'max_depth': 9, 'learning_rate': 0.016202268325081547, 'n_estimators': 508, 'num_leaves': 32, 'min_child_samples': 38, 'subsample': 0.5372531657324826, 'colsample_bytree': 0.6503856560422789, 'reg_alpha': 4.178774741626879, 'reg_lambda': 2.0198052064847136}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:19,800] Trial 42 finished with value: 8.30427315789287 and parameters: {'max_depth': 9, 'learning_rate': 0.02061050364273148, 'n_estimators': 519, 'num_leaves': 33, 'min_child_samples': 37, 'subsample': 0.5470004279136522, 'colsample_bytree': 0.6024953598964419, 'reg_alpha': 4.160962454074879, 'reg_lambda': 2.0908805769428325}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:21,529] Trial 43 finished with value: 8.298184442188429 and parameters: {'max_depth': 8, 'learning_rate': 0.016325856907516853, 'n_estimators': 402, 'num_leaves': 21, 'min_child_samples': 32, 'subsample': 0.6083241080266372, 'colsample_bytree': 0.6638113320925995, 'reg_alpha': 3.471543186032251, 'reg_lambda': 2.6651750952330993}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:22,918] Trial 44 finished with value: 8.315574584814074 and parameters: {'max_depth': 9, 'learning_rate': 0.016150840789866203, 'n_estimators': 331, 'num_leaves': 20, 'min_child_samples': 33, 'subsample': 0.604429394141181, 'colsample_bytree': 0.7092469342419457, 'reg_alpha': 4.390171308502277, 'reg_lambda': 2.2536879985161216}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:25,312] Trial 45 finished with value: 8.312127372694205 and parameters: {'max_depth': 8, 'learning_rate': 0.01201917877537458, 'n_estimators': 445, 'num_leaves': 26, 'min_child_samples': 29, 'subsample': 0.6246238229919902, 'colsample_bytree': 0.7517534074465884, 'reg_alpha': 4.027796655490608, 'reg_lambda': 2.0573645704219277}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:26,591] Trial 46 finished with value: 8.345751275726847 and parameters: {'max_depth': 10, 'learning_rate': 0.016105427792259555, 'n_estimators': 254, 'num_leaves': 23, 'min_child_samples': 40, 'subsample': 0.5285832536806883, 'colsample_bytree': 0.6777418594218043, 'reg_alpha': 4.987744821118999, 'reg_lambda': 2.7486806146144915}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:27,085] Trial 47 finished with value: 8.932586607822781 and parameters: {'max_depth': 1, 'learning_rate': 0.018754351412215924, 'n_estimators': 376, 'num_leaves': 40, 'min_child_samples': 35, 'subsample': 0.6535994694772361, 'colsample_bytree': 0.5915565273498845, 'reg_alpha': 3.768279953165382, 'reg_lambda': 1.8298391038872384}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:29,559] Trial 48 finished with value: 8.36796110383171 and parameters: {'max_depth': 7, 'learning_rate': 0.011494726173807014, 'n_estimators': 276, 'num_leaves': 102, 'min_child_samples': 22, 'subsample': 0.5936980696919184, 'colsample_bytree': 0.631532593963928, 'reg_alpha': 3.296751064702764, 'reg_lambda': 2.357742484784777}. Best is trial 41 with value: 8.289247326572998.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574


[I 2024-11-21 22:09:32,606] Trial 49 finished with value: 8.303611902793087 and parameters: {'max_depth': 9, 'learning_rate': 0.013663310100697235, 'n_estimators': 612, 'num_leaves': 27, 'min_child_samples': 41, 'subsample': 0.5493030506085124, 'colsample_bytree': 0.6712911758496289, 'reg_alpha': 4.8115551898331566, 'reg_lambda': 1.2052449876351163}. Best is trial 41 with value: 8.289247326572998.


Best LightGBM parameters for margin: {'max_depth': 9, 'learning_rate': 0.016202268325081547, 'n_estimators': 508, 'num_leaves': 32, 'min_child_samples': 38, 'subsample': 0.5372531657324826, 'colsample_bytree': 0.6503856560422789, 'reg_alpha': 4.178774741626879, 'reg_lambda': 2.0198052064847136}
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37940
[LightGBM] [Info] Number of data points in the train set: 12399, number of used features: 169
[LightGBM] [Info] Start training from score 11.917574
LightGBM with Outlier Score Feature R^2: 0.2147
LightGBM with Outlier Score Feature MAE: 6.4156
LightGBM with Outlier Score Feature RMSE: 8.2892


### LIGHTGBM OUTLIER REDUCTION

In [62]:
import optuna
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
import joblib
import os
import pandas as pd

os.makedirs("lgb_models", exist_ok=True)

# Define thresholds for capping target values
target_outlier_thresholds = {
    'home_score': {'low': 50, 'high': 100},
    'away_score': {'low': 50, 'high': 100},
    'total': {'low': 110, 'high': 180},
    'margin': {'low': -30, 'high': 30},
}

# Specify the features and target column for each target
for target_column in ['margin', 'total', 'away_score', 'home_score']:
    categorical_columns = ['Conf_home', 'Conf_away']  # Update to actual categorical columns

    # Prepare data
    model_df = df[[target_column, 'Date','Rk_home','Conf_home', 'AdjEM_home', 'AdjO_home',
       'AdjO_Rk_home', 'AdjD_home', 'AdjD_Rk_home', 'AdjT_home',
       'AdjT_Rk_home', 'Rk_rolling_1_home',
       'AdjEM_rolling_1_home', 'AdjO_rolling_1_home',
       'AdjO_Rk_rolling_1_home', 'AdjD_rolling_1_home',
       'AdjD_Rk_rolling_1_home', 'AdjT_rolling_1_home',
       'AdjT_Rk_rolling_1_home', 'Rk_rolling_3_home',
       'AdjEM_rolling_3_home', 'AdjO_rolling_3_home',
       'AdjO_Rk_rolling_3_home', 'AdjD_rolling_3_home',
       'AdjD_Rk_rolling_3_home', 'AdjT_rolling_3_home',
       'AdjT_Rk_rolling_3_home', 'Rk_rolling_5_home',
       'AdjEM_rolling_5_home', 'AdjO_rolling_5_home',
       'AdjO_Rk_rolling_5_home', 'AdjD_rolling_5_home',
       'AdjD_Rk_rolling_5_home', 'AdjT_rolling_5_home',
       'AdjT_Rk_rolling_5_home', 'Rk_rolling_10_home',
       'AdjEM_rolling_10_home', 'AdjO_rolling_10_home',
       'AdjO_Rk_rolling_10_home', 'AdjD_rolling_10_home',
       'AdjD_Rk_rolling_10_home', 'AdjT_rolling_10_home',
       'AdjT_Rk_rolling_10_home', 'Rk_rolling_20_home',
       'AdjEM_rolling_20_home', 'AdjO_rolling_20_home',
       'AdjO_Rk_rolling_20_home', 'AdjD_rolling_20_home',
       'AdjD_Rk_rolling_20_home', 'AdjT_rolling_20_home',
       'AdjT_Rk_rolling_20_home', 'Rk_away', 'Conf_away',
       'AdjEM_away', 'AdjO_away', 'AdjO_Rk_away', 'AdjD_away',
       'AdjD_Rk_away', 'AdjT_away', 'AdjT_Rk_away',
       'Rk_rolling_1_away', 'AdjEM_rolling_1_away', 'AdjO_rolling_1_away',
       'AdjO_Rk_rolling_1_away', 'AdjD_rolling_1_away',
       'AdjD_Rk_rolling_1_away', 'AdjT_rolling_1_away',
       'AdjT_Rk_rolling_1_away', 'Rk_rolling_3_away',
       'AdjEM_rolling_3_away', 'AdjO_rolling_3_away',
       'AdjO_Rk_rolling_3_away', 'AdjD_rolling_3_away',
       'AdjD_Rk_rolling_3_away', 'AdjT_rolling_3_away',
       'AdjT_Rk_rolling_3_away', 'Rk_rolling_5_away',
       'AdjEM_rolling_5_away', 'AdjO_rolling_5_away',
       'AdjO_Rk_rolling_5_away', 'AdjD_rolling_5_away',
       'AdjD_Rk_rolling_5_away', 'AdjT_rolling_5_away',
       'AdjT_Rk_rolling_5_away', 'Rk_rolling_10_away',
       'AdjEM_rolling_10_away', 'AdjO_rolling_10_away',
       'AdjO_Rk_rolling_10_away', 'AdjD_rolling_10_away',
       'AdjD_Rk_rolling_10_away', 'AdjT_rolling_10_away',
       'AdjT_Rk_rolling_10_away', 'Rk_rolling_20_away',
       'AdjEM_rolling_20_away', 'AdjO_rolling_20_away',
       'AdjO_Rk_rolling_20_away', 'AdjD_rolling_20_away',
       'AdjD_Rk_rolling_20_away', 'AdjT_rolling_20_away',
       'AdjT_Rk_rolling_20_away',  'home_sos',
       'away_sos', 'home_days_since_last_game',
       'home_rolling_avg_score_1', 'home_rolling_avg_score_3',
       'home_rolling_avg_score_7', 'home_rolling_sos_1',
       'home_rolling_sos_3', 'home_rolling_sos_7',
       'home_rolling_avg_score_allowed_1',
       'home_rolling_avg_score_allowed_3',
       'home_rolling_avg_score_allowed_7',
       'home_rolling_opp_score_rank_1', 'home_rolling_opp_score_rank_3',
       'home_rolling_opp_score_rank_7', 'away_days_since_last_game',
       'away_rolling_avg_score_1', 'away_rolling_avg_score_3',
       'away_rolling_avg_score_7', 'away_rolling_sos_1',
       'away_rolling_sos_3', 'away_rolling_sos_7',
       'away_rolling_avg_score_allowed_1',
       'away_rolling_avg_score_allowed_3',
       'away_rolling_avg_score_allowed_7',
       'away_rolling_opp_score_rank_1', 'away_rolling_opp_score_rank_3',
       'away_rolling_opp_score_rank_7']]

    # Handle outliers in the target column using capping
    low_threshold = target_outlier_thresholds[target_column]['low']
    high_threshold = target_outlier_thresholds[target_column]['high']
    model_df[target_column] = model_df[target_column].clip(lower=low_threshold, upper=high_threshold)

    # Encode categorical columns
    for cat_col in categorical_columns:
        le = LabelEncoder()
        model_df[cat_col] = le.fit_transform(df[cat_col])

    # Ensure the dataframe is sorted by the "date" column
    model_df['date'] = pd.to_datetime(df['Date'])  # Convert to datetime if not already
    model_df = model_df.sort_values(by='date')

    # Define the split ratio
    test_ratio = 0.2  # 20% of the data will be used for testing
    split_index = int(len(model_df) * (1 - test_ratio))

    # Split data into training and testing sets
    train_data = model_df.iloc[:split_index]
    test_data = model_df.iloc[split_index:]

    # Separate features and target
    X_train = train_data.drop(columns=['date', 'Date', target_column])
    y_train = train_data[target_column]

    X_test = test_data.drop(columns=['date', 'Date', target_column])
    y_test = test_data[target_column]

    # Define Optuna objective function for LightGBM
    def objective_lgb(trial):
        param = {
            'objective': 'regression',
            'metric': 'rmse',
            'boosting_type': 'gbdt',
            'max_depth': trial.suggest_int('max_depth', -1, 10),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'n_estimators': trial.suggest_int('n_estimators', 100, 2000),
            'num_leaves': trial.suggest_int('num_leaves', 20, 150),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
            'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        }
        
        # Convert training and testing sets to lightgbm.Dataset
        train_data = lgb.Dataset(X_train, label=y_train)
        valid_data = lgb.Dataset(X_test, label=y_test)
        
        # Train LightGBM model with early stopping
        model = lgb.train(
            params=param,
            train_set=train_data,
            valid_sets=[train_data, valid_data],
            valid_names=["train", "valid"],
        )
        
        # Use best iteration for prediction
        y_pred = model.predict(X_test, num_iteration=model.best_iteration)
        rmse = mean_squared_error(y_test, y_pred, squared=False)
        return rmse

    # Optimize LightGBM with Optuna
    study_lgb = optuna.create_study(direction='minimize')
    study_lgb.optimize(objective_lgb, n_trials=50)
    print(f"Best LightGBM parameters for {target_column}: {study_lgb.best_params}")

    # Train the best LightGBM model with the optimized parameters
    best_lgb = lgb.LGBMRegressor(**study_lgb.best_params)
    best_lgb.fit(X_train, y_train)
    y_pred_lgb = best_lgb.predict(X_test)

    # Save the final best model for this target column
    best_model_filename = f"lgb_models/{target_column}_best_model.joblib"
    joblib.dump(best_lgb, best_model_filename)

    # Define a function to calculate and display metrics
    def display_metrics(y_true, y_pred, model_name):
        r2 = r2_score(y_true, y_pred)
        mae = mean_absolute_error(y_true, y_pred)
        rmse = mean_squared_error(y_true, y_pred, squared=False)
        print(f"{model_name} R^2: {r2:.4f}")
        print(f"{model_name} MAE: {mae:.4f}")
        print(f"{model_name} RMSE: {rmse:.4f}")

    # Display metrics for the LightGBM model
    display_metrics(y_test, y_pred_lgb, "LightGBM with Capped Targets")

[I 2024-11-22 08:00:06,002] A new study created in memory with name: no-name-246d25bb-a638-4c3c-a031-484546ab076b


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:00:20,918] Trial 0 finished with value: 11.794189331037376 and parameters: {'max_depth': 0, 'learning_rate': 0.18373481246223472, 'n_estimators': 928, 'num_leaves': 61, 'min_child_samples': 22, 'subsample': 0.9765909567114052, 'colsample_bytree': 0.7223046476596681, 'reg_alpha': 3.807303491886786, 'reg_lambda': 1.8642158978293506}. Best is trial 0 with value: 11.794189331037376.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:00:26,432] Trial 1 finished with value: 11.159007201491523 and parameters: {'max_depth': 1, 'learning_rate': 0.01698889754123644, 'n_estimators': 1751, 'num_leaves': 88, 'min_child_samples': 20, 'subsample': 0.8459178804084735, 'colsample_bytree': 0.535642473259419, 'reg_alpha': 0.552887898793446, 'reg_lambda': 1.6958827815119693}. Best is trial 1 with value: 11.159007201491523.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:00:34,834] Trial 2 finished with value: 11.16095075690304 and parameters: {'max_depth': 2, 'learning_rate': 0.036423485666774634, 'n_estimators': 1699, 'num_leaves': 31, 'min_child_samples': 40, 'subsample': 0.658834409723692, 'colsample_bytree': 0.6394599967154572, 'reg_alpha': 0.8783380863179557, 'reg_lambda': 4.41073231477847}. Best is trial 1 with value: 11.159007201491523.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:00:39,671] Trial 3 finished with value: 11.304443719844778 and parameters: {'max_depth': 9, 'learning_rate': 0.18555212764984635, 'n_estimators': 297, 'num_leaves': 21, 'min_child_samples': 14, 'subsample': 0.6483123072557083, 'colsample_bytree': 0.9775182832091296, 'reg_alpha': 0.7257786648501852, 'reg_lambda': 3.267990005996844}. Best is trial 1 with value: 11.159007201491523.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:00:48,510] Trial 4 finished with value: 11.290875721223388 and parameters: {'max_depth': 9, 'learning_rate': 0.09960835091407445, 'n_estimators': 317, 'num_leaves': 59, 'min_child_samples': 27, 'subsample': 0.6632363414246435, 'colsample_bytree': 0.9314444739018537, 'reg_alpha': 1.715443106766994, 'reg_lambda': 0.6688185579863276}. Best is trial 1 with value: 11.159007201491523.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:01:00,073] Trial 5 finished with value: 11.511534619638246 and parameters: {'max_depth': 7, 'learning_rate': 0.12879031431656535, 'n_estimators': 465, 'num_leaves': 82, 'min_child_samples': 6, 'subsample': 0.5232585128163335, 'colsample_bytree': 0.6817925798277795, 'reg_alpha': 0.10788892293116581, 'reg_lambda': 0.975958787210523}. Best is trial 1 with value: 11.159007201491523.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:01:09,515] Trial 6 finished with value: 11.24898162384347 and parameters: {'max_depth': 1, 'learning_rate': 0.01253617152198292, 'n_estimators': 1205, 'num_leaves': 20, 'min_child_samples': 13, 'subsample': 0.9100070733140366, 'colsample_bytree': 0.9645879723799308, 'reg_alpha': 4.0138719143911255, 'reg_lambda': 3.7302372321720845}. Best is trial 1 with value: 11.159007201491523.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:01:18,869] Trial 7 finished with value: 11.134428645865617 and parameters: {'max_depth': 9, 'learning_rate': 0.01876885451802679, 'n_estimators': 441, 'num_leaves': 49, 'min_child_samples': 7, 'subsample': 0.9407069381158955, 'colsample_bytree': 0.583062818003536, 'reg_alpha': 3.1273558967689445, 'reg_lambda': 4.664159537617658}. Best is trial 7 with value: 11.134428645865617.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:02:06,217] Trial 8 finished with value: 12.025092979367384 and parameters: {'max_depth': 6, 'learning_rate': 0.19266472151756528, 'n_estimators': 1989, 'num_leaves': 92, 'min_child_samples': 11, 'subsample': 0.9984285972842645, 'colsample_bytree': 0.5157252914079838, 'reg_alpha': 4.672302550417526, 'reg_lambda': 1.441923263533114}. Best is trial 7 with value: 11.134428645865617.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:02:20,970] Trial 9 finished with value: 11.135205923876894 and parameters: {'max_depth': 3, 'learning_rate': 0.020470897341601325, 'n_estimators': 1673, 'num_leaves': 32, 'min_child_samples': 34, 'subsample': 0.9521382642026008, 'colsample_bytree': 0.6196539310427356, 'reg_alpha': 3.113632720871548, 'reg_lambda': 2.5512400064404894}. Best is trial 7 with value: 11.134428645865617.


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 30540
[LightGBM] [Info] Number of data points in the train set: 55835, number of used features: 126
[LightGBM] [Info] Start training from score -0.004191


[I 2024-11-22 08:02:48,379] Trial 10 finished with value: 11.279301191951756 and parameters: {'max_depth': 10, 'learning_rate': 0.03875113223176095, 'n_estimators': 770, 'num_leaves': 145, 'min_child_samples': 50, 'subsample': 0.837428577893223, 'colsample_bytree': 0.798054529522819, 'reg_alpha': 2.300832203727409, 'reg_lambda': 4.932868716889569}. Best is trial 7 with value: 11.134428645865617.
[W 2024-11-22 08:02:48,882] Trial 11 failed with parameters: {'max_depth': 4, 'learning_rate': 0.01813579353960066, 'n_estimators': 1540, 'num_leaves': 48, 'min_child_samples': 36, 'subsample': 0.9068317625867457, 'colsample_bytree': 0.6051409272933892, 'reg_alpha': 3.0874090524739928, 'reg_lambda': 2.7548597753822968} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folde

KeyboardInterrupt: 

### XGBOOST OUTLIER REDUCTION

In [3]:
import optuna
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, matthews_corrcoef
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import joblib
import os
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

os.makedirs("xgb_models", exist_ok=True)

# Specify the features and target column for each target
for target_column in ['home_win', 'away_win']:
    categorical_columns = ['Conf_home', 'Conf_away']  # Update to actual categorical columns

    # Prepare data
    model_df = df[[target_column, 'Date','Rk_home','Conf_home', 'AdjEM_home', 'AdjO_home',
       'AdjO_Rk_home', 'AdjD_home', 'AdjD_Rk_home', 'AdjT_home',
       'AdjT_Rk_home', 'Rk_rolling_1_home',
       'AdjEM_rolling_1_home', 'AdjO_rolling_1_home',
       'AdjO_Rk_rolling_1_home', 'AdjD_rolling_1_home',
       'AdjD_Rk_rolling_1_home', 'AdjT_rolling_1_home',
       'AdjT_Rk_rolling_1_home', 'Rk_rolling_3_home',
       'AdjEM_rolling_3_home', 'AdjO_rolling_3_home',
       'AdjO_Rk_rolling_3_home', 'AdjD_rolling_3_home',
       'AdjD_Rk_rolling_3_home', 'AdjT_rolling_3_home',
       'AdjT_Rk_rolling_3_home', 'Rk_rolling_5_home',
       'AdjEM_rolling_5_home', 'AdjO_rolling_5_home',
       'AdjO_Rk_rolling_5_home', 'AdjD_rolling_5_home',
       'AdjD_Rk_rolling_5_home', 'AdjT_rolling_5_home',
       'AdjT_Rk_rolling_5_home', 'Rk_rolling_10_home',
       'AdjEM_rolling_10_home', 'AdjO_rolling_10_home',
       'AdjO_Rk_rolling_10_home', 'AdjD_rolling_10_home',
       'AdjD_Rk_rolling_10_home', 'AdjT_rolling_10_home',
       'AdjT_Rk_rolling_10_home', 'Rk_rolling_20_home',
       'AdjEM_rolling_20_home', 'AdjO_rolling_20_home',
       'AdjO_Rk_rolling_20_home', 'AdjD_rolling_20_home',
       'AdjD_Rk_rolling_20_home', 'AdjT_rolling_20_home',
       'AdjT_Rk_rolling_20_home', 'Rk_away', 'Conf_away',
       'AdjEM_away', 'AdjO_away', 'AdjO_Rk_away', 'AdjD_away',
       'AdjD_Rk_away', 'AdjT_away', 'AdjT_Rk_away',
       'Rk_rolling_1_away', 'AdjEM_rolling_1_away', 'AdjO_rolling_1_away',
       'AdjO_Rk_rolling_1_away', 'AdjD_rolling_1_away',
       'AdjD_Rk_rolling_1_away', 'AdjT_rolling_1_away',
       'AdjT_Rk_rolling_1_away', 'Rk_rolling_3_away',
       'AdjEM_rolling_3_away', 'AdjO_rolling_3_away',
       'AdjO_Rk_rolling_3_away', 'AdjD_rolling_3_away',
       'AdjD_Rk_rolling_3_away', 'AdjT_rolling_3_away',
       'AdjT_Rk_rolling_3_away', 'Rk_rolling_5_away',
       'AdjEM_rolling_5_away', 'AdjO_rolling_5_away',
       'AdjO_Rk_rolling_5_away', 'AdjD_rolling_5_away',
       'AdjD_Rk_rolling_5_away', 'AdjT_rolling_5_away',
       'AdjT_Rk_rolling_5_away', 'Rk_rolling_10_away',
       'AdjEM_rolling_10_away', 'AdjO_rolling_10_away',
       'AdjO_Rk_rolling_10_away', 'AdjD_rolling_10_away',
       'AdjD_Rk_rolling_10_away', 'AdjT_rolling_10_away',
       'AdjT_Rk_rolling_10_away', 'Rk_rolling_20_away',
       'AdjEM_rolling_20_away', 'AdjO_rolling_20_away',
       'AdjO_Rk_rolling_20_away', 'AdjD_rolling_20_away',
       'AdjD_Rk_rolling_20_away', 'AdjT_rolling_20_away',
       'AdjT_Rk_rolling_20_away',  'home_sos',
       'away_sos', 'home_days_since_last_game',
       'home_rolling_avg_score_1', 'home_rolling_avg_score_3',
       'home_rolling_avg_score_7', 'home_rolling_sos_1',
       'home_rolling_sos_3', 'home_rolling_sos_7',
       'home_rolling_avg_score_allowed_1',
       'home_rolling_avg_score_allowed_3',
       'home_rolling_avg_score_allowed_7',
       'home_rolling_opp_score_rank_1', 'home_rolling_opp_score_rank_3',
       'home_rolling_opp_score_rank_7', 'away_days_since_last_game',
       'away_rolling_avg_score_1', 'away_rolling_avg_score_3',
       'away_rolling_avg_score_7', 'away_rolling_sos_1',
       'away_rolling_sos_3', 'away_rolling_sos_7',
       'away_rolling_avg_score_allowed_1',
       'away_rolling_avg_score_allowed_3',
       'away_rolling_avg_score_allowed_7',
       'away_rolling_opp_score_rank_1', 'away_rolling_opp_score_rank_3',
       'away_rolling_opp_score_rank_7']]

    # Encode categorical columns
    for cat_col in categorical_columns:
        le = LabelEncoder()
        model_df[cat_col] = le.fit_transform(df[cat_col])

    # Ensure the dataframe is sorted by the "date" column
    model_df['date'] = pd.to_datetime(df['Date'])  # Convert to datetime if not already
    model_df = model_df.sort_values(by='date')

    # Define the split ratio
    test_ratio = 0.2  # 20% of the data will be used for testing
    split_index = int(len(model_df) * (1 - test_ratio))

    # Split data into training and testing sets
    train_data = model_df.iloc[:split_index]
    test_data = model_df.iloc[split_index:]

    # Separate features and target
    X_train = train_data.drop(columns=['date', 'Date', target_column])
    y_train = train_data[target_column]

    X_test = test_data.drop(columns=['date', 'Date', target_column])
    y_test = test_data[target_column]

    def objective_xgb(trial):
        param = {
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'n_estimators': trial.suggest_int('n_estimators', 100, 1500),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
            'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        }

        # Train XGBoost model
        model = xgb.XGBClassifier(**param, use_label_encoder=False)
        model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)

        # Evaluate the model using AUC as the optimization metric
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, y_pred_proba)
        return 1 - auc  # Minimize (1 - AUC)

    # Optimize XGBoost with Optuna
    study_xgb = optuna.create_study(direction='minimize')
    study_xgb.optimize(objective_xgb, n_trials=15)
    print(f"Best XGBoost parameters for {target_column}: {study_xgb.best_params}")

    # Train the final XGBoost model with the best parameters
    best_xgb = xgb.XGBClassifier(**study_xgb.best_params, use_label_encoder=False)
    best_xgb.fit(X_train, y_train)
    y_pred = best_xgb.predict(X_test)
    y_pred_proba = best_xgb.predict_proba(X_test)[:, 1]

    # Calculate metrics
    auc = roc_auc_score(y_test, y_pred_proba)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    mcc = matthews_corrcoef(y_test, y_pred)

    # Display metrics
    print(f"Metrics for {target_column}:")
    print(f"AUC: {auc:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"MCC: {mcc:.4f}")

    # Save the final model
    model_path = f"xgb_models/{target_column}_best_model.joblib"
    joblib.dump(best_xgb, model_path)
    print(f"Model saved to {model_path}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df[cat_col] = le.fit_transform(df[cat_col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df['date'] = pd.to_datetime(df['Date'])  # Convert to datetime if not already
[I 2024-11-25 08:50:15,847] A new study created in memory with name: no-name-ea40788b-c51a-436a-bb40-d599880170e9
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-11-25 08:50:49,591] Trial 0 finished with value: 0.22554254679059993 and parameters: {'max_depth': 3, 'learning_rate': 0.01747064207537779, '

Best XGBoost parameters for home_win: {'max_depth': 4, 'learning_rate': 0.010829648023162497, 'n_estimators': 1106, 'min_child_weight': 6, 'subsample': 0.5067993980450405, 'colsample_bytree': 0.5164111685918681, 'reg_alpha': 0.8945854676306653, 'reg_lambda': 4.788248512991332}
Metrics for home_win:
AUC: 0.7747
Accuracy: 0.6965
F1 Score: 0.6975
MCC: 0.3931
Model saved to xgb_models/home_win_best_model.joblib


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df[cat_col] = le.fit_transform(df[cat_col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df['date'] = pd.to_datetime(df['Date'])  # Convert to datetime if not already
[I 2024-11-25 08:58:15,362] A new study created in memory with name: no-name-b71aafd1-aa0f-4866-9026-100bdf76f1ba
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-11-25 08:58:27,270] Trial 0 finished with value: 0.23228625810892334 and parameters: {'max_depth': 10, 'learning_rate': 0.074880270868692, 'n

Best XGBoost parameters for away_win: {'max_depth': 4, 'learning_rate': 0.010794438699945744, 'n_estimators': 1424, 'min_child_weight': 1, 'subsample': 0.5060593858397434, 'colsample_bytree': 0.9061760984354007, 'reg_alpha': 2.043618456321129, 'reg_lambda': 1.5824029012988672}
Metrics for away_win:
AUC: 0.7735
Accuracy: 0.6955
F1 Score: 0.6950
MCC: 0.3911
Model saved to xgb_models/away_win_best_model.joblib


## ADABOOST

In [9]:
import os
import pandas as pd
import joblib
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import optuna

os.makedirs("adaboost_models", exist_ok=True)


for target_column in ['home_win', 'away_win']:
    categorical_columns = ['Conf_home', 'Conf_away']  # Update to actual categorical columns

    # Prepare data
    model_df = df[[target_column, 'Date','Rk_home','Conf_home', 'AdjEM_home', 'AdjO_home',
       'AdjO_Rk_home', 'AdjD_home', 'AdjD_Rk_home', 'AdjT_home',
       'AdjT_Rk_home', 'Rk_rolling_1_home',
       'AdjEM_rolling_1_home', 'AdjO_rolling_1_home',
       'AdjO_Rk_rolling_1_home', 'AdjD_rolling_1_home',
       'AdjD_Rk_rolling_1_home', 'AdjT_rolling_1_home',
       'AdjT_Rk_rolling_1_home', 'Rk_rolling_3_home',
       'AdjEM_rolling_3_home', 'AdjO_rolling_3_home',
       'AdjO_Rk_rolling_3_home', 'AdjD_rolling_3_home',
       'AdjD_Rk_rolling_3_home', 'AdjT_rolling_3_home',
       'AdjT_Rk_rolling_3_home', 'Rk_rolling_5_home',
       'AdjEM_rolling_5_home', 'AdjO_rolling_5_home',
       'AdjO_Rk_rolling_5_home', 'AdjD_rolling_5_home',
       'AdjD_Rk_rolling_5_home', 'AdjT_rolling_5_home',
       'AdjT_Rk_rolling_5_home', 'Rk_rolling_10_home',
       'AdjEM_rolling_10_home', 'AdjO_rolling_10_home',
       'AdjO_Rk_rolling_10_home', 'AdjD_rolling_10_home',
       'AdjD_Rk_rolling_10_home', 'AdjT_rolling_10_home',
       'AdjT_Rk_rolling_10_home', 'Rk_rolling_20_home',
       'AdjEM_rolling_20_home', 'AdjO_rolling_20_home',
       'AdjO_Rk_rolling_20_home', 'AdjD_rolling_20_home',
       'AdjD_Rk_rolling_20_home', 'AdjT_rolling_20_home',
       'AdjT_Rk_rolling_20_home', 'Rk_away', 'Conf_away',
       'AdjEM_away', 'AdjO_away', 'AdjO_Rk_away', 'AdjD_away',
       'AdjD_Rk_away', 'AdjT_away', 'AdjT_Rk_away',
       'Rk_rolling_1_away', 'AdjEM_rolling_1_away', 'AdjO_rolling_1_away',
       'AdjO_Rk_rolling_1_away', 'AdjD_rolling_1_away',
       'AdjD_Rk_rolling_1_away', 'AdjT_rolling_1_away',
       'AdjT_Rk_rolling_1_away', 'Rk_rolling_3_away',
       'AdjEM_rolling_3_away', 'AdjO_rolling_3_away',
       'AdjO_Rk_rolling_3_away', 'AdjD_rolling_3_away',
       'AdjD_Rk_rolling_3_away', 'AdjT_rolling_3_away',
       'AdjT_Rk_rolling_3_away', 'Rk_rolling_5_away',
       'AdjEM_rolling_5_away', 'AdjO_rolling_5_away',
       'AdjO_Rk_rolling_5_away', 'AdjD_rolling_5_away',
       'AdjD_Rk_rolling_5_away', 'AdjT_rolling_5_away',
       'AdjT_Rk_rolling_5_away', 'Rk_rolling_10_away',
       'AdjEM_rolling_10_away', 'AdjO_rolling_10_away',
       'AdjO_Rk_rolling_10_away', 'AdjD_rolling_10_away',
       'AdjD_Rk_rolling_10_away', 'AdjT_rolling_10_away',
       'AdjT_Rk_rolling_10_away', 'Rk_rolling_20_away',
       'AdjEM_rolling_20_away', 'AdjO_rolling_20_away',
       'AdjO_Rk_rolling_20_away', 'AdjD_rolling_20_away',
       'AdjD_Rk_rolling_20_away', 'AdjT_rolling_20_away',
       'AdjT_Rk_rolling_20_away',  'home_sos',
       'away_sos', 'home_days_since_last_game',
       'home_rolling_avg_score_1', 'home_rolling_avg_score_3',
       'home_rolling_avg_score_7', 'home_rolling_sos_1',
       'home_rolling_sos_3', 'home_rolling_sos_7',
       'home_rolling_avg_score_allowed_1',
       'home_rolling_avg_score_allowed_3',
       'home_rolling_avg_score_allowed_7',
       'home_rolling_opp_score_rank_1', 'home_rolling_opp_score_rank_3',
       'home_rolling_opp_score_rank_7', 'away_days_since_last_game',
       'away_rolling_avg_score_1', 'away_rolling_avg_score_3',
       'away_rolling_avg_score_7', 'away_rolling_sos_1',
       'away_rolling_sos_3', 'away_rolling_sos_7',
       'away_rolling_avg_score_allowed_1',
       'away_rolling_avg_score_allowed_3',
       'away_rolling_avg_score_allowed_7',
       'away_rolling_opp_score_rank_1', 'away_rolling_opp_score_rank_3',
       'away_rolling_opp_score_rank_7']]

    # Encode categorical columns
    for cat_col in categorical_columns:
        le = LabelEncoder()
        model_df.loc[:, cat_col] = le.fit_transform(df[cat_col])

    model_df.loc[:, 'Date'] = pd.to_datetime(model_df['Date'])

    # Sort the dataframe by date in ascending order
    model_df = model_df.sort_values(by='Date').reset_index(drop=True)

    # Calculate the index for the 80% split
    split_index = int(len(model_df) * 0.8)

    # Split the data into training and testing sets
    train_data = model_df.iloc[:split_index]
    test_data = model_df.iloc[split_index:]

    # Separate features and target
    X_train = train_data.drop(columns=['Date', target_column])
    y_train = train_data[target_column]

    X_test = test_data.drop(columns=['Date', target_column])
    y_test = test_data[target_column]

    # Define Optuna objective function for AdaBoost
    def objective_adaboost(trial):
        param = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 500),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
            'base_estimator__max_depth': trial.suggest_int('max_depth', 1, 5),
        }

        # Define AdaBoost with a DecisionTree base estimator
        base_estimator = DecisionTreeClassifier(max_depth=param['base_estimator__max_depth'])
        model = AdaBoostClassifier(base_estimator=base_estimator,
                                   n_estimators=param['n_estimators'],
                                   learning_rate=param['learning_rate'])

        model.fit(X_train, y_train)
        y_pred_proba = model.predict_proba(X_test)[:, 1]

        # Use ROC AUC as the optimization metric
        auc = roc_auc_score(y_test, y_pred_proba)
        return auc

    # Optimize AdaBoost with Optuna
    study_adaboost = optuna.create_study(direction='maximize')
    study_adaboost.optimize(objective_adaboost, n_trials=5)
    print(f"Best AdaBoost parameters for {target_column}: {study_adaboost.best_params}")

    # Train the best AdaBoost model with optimized parameters
    best_params = study_adaboost.best_params
    best_base_estimator = DecisionTreeClassifier(max_depth=best_params['base_estimator__max_depth'])
    best_adaboost = AdaBoostClassifier(base_estimator=best_base_estimator,
                                       n_estimators=best_params['n_estimators'],
                                       learning_rate=best_params['learning_rate'])
    best_adaboost.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = best_adaboost.predict(X_test)
    y_pred_proba = best_adaboost.predict_proba(X_test)[:, 1]

    # Define a function to calculate and display metrics
    def display_metrics(y_true, y_pred, y_pred_proba, model_name):
        auc = roc_auc_score(y_true, y_pred_proba)
        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred)
        print(f"{model_name} AUC: {auc:.4f}")
        print(f"{model_name} Accuracy: {accuracy:.4f}")
        print(f"{model_name} F1 Score: {f1:.4f}")

    # Display metrics for the AdaBoost model
    display_metrics(y_test, y_pred, y_pred_proba, "AdaBoost")

    # Save the final best model for this target column
    best_model_filename = f"adaboost_models/{target_column}_best_model.joblib"
    joblib.dump(best_adaboost, best_model_filename)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df.loc[:, cat_col] = le.fit_transform(df[cat_col])
  model_df.loc[:, cat_col] = le.fit_transform(df[cat_col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df.loc[:, 'Date'] = pd.to_datetime(model_df['Date'])
  model_df.loc[:, 'Date'] = pd.to_datetime(model_df['Date'])
[I 2024-11-24 13:57:17,970] A new study created in memory with name: no-name-2ec1ddf2-2b65-4a57-9287-f711a8f35025
[W 2024-11-24 13:59:13,179] Trial 0 failed with parameters: {'n_estimators': 294, 'learning_rate': 0.5527468452889346, '

KeyboardInterrupt: 

In [13]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Sample data preparation
model_df = df[['home_win','Date','Rk_home','Conf_home', 'AdjEM_home', 'AdjO_home',
       'AdjO_Rk_home', 'AdjD_home', 'AdjD_Rk_home', 'AdjT_home',
       'AdjT_Rk_home', 'Rk_rolling_1_home',
       'AdjEM_rolling_1_home', 'AdjO_rolling_1_home',
       'AdjO_Rk_rolling_1_home', 'AdjD_rolling_1_home',
       'AdjD_Rk_rolling_1_home', 'AdjT_rolling_1_home',
       'AdjT_Rk_rolling_1_home', 'Rk_rolling_3_home',
       'AdjEM_rolling_3_home', 'AdjO_rolling_3_home',
       'AdjO_Rk_rolling_3_home', 'AdjD_rolling_3_home',
       'AdjD_Rk_rolling_3_home', 'AdjT_rolling_3_home',
       'AdjT_Rk_rolling_3_home', 'Rk_rolling_5_home',
       'AdjEM_rolling_5_home', 'AdjO_rolling_5_home',
       'AdjO_Rk_rolling_5_home', 'AdjD_rolling_5_home',
       'AdjD_Rk_rolling_5_home', 'AdjT_rolling_5_home',
       'AdjT_Rk_rolling_5_home', 'Rk_rolling_10_home',
       'AdjEM_rolling_10_home', 'AdjO_rolling_10_home',
       'AdjO_Rk_rolling_10_home', 'AdjD_rolling_10_home',
       'AdjD_Rk_rolling_10_home', 'AdjT_rolling_10_home',
       'AdjT_Rk_rolling_10_home', 'Rk_rolling_20_home',
       'AdjEM_rolling_20_home', 'AdjO_rolling_20_home',
       'AdjO_Rk_rolling_20_home', 'AdjD_rolling_20_home',
       'AdjD_Rk_rolling_20_home', 'AdjT_rolling_20_home',
       'AdjT_Rk_rolling_20_home', 'Rk_away', 'Conf_away',
       'AdjEM_away', 'AdjO_away', 'AdjO_Rk_away', 'AdjD_away',
       'AdjD_Rk_away', 'AdjT_away', 'AdjT_Rk_away',
       'Rk_rolling_1_away', 'AdjEM_rolling_1_away', 'AdjO_rolling_1_away',
       'AdjO_Rk_rolling_1_away', 'AdjD_rolling_1_away',
       'AdjD_Rk_rolling_1_away', 'AdjT_rolling_1_away',
       'AdjT_Rk_rolling_1_away', 'Rk_rolling_3_away',
       'AdjEM_rolling_3_away', 'AdjO_rolling_3_away',
       'AdjO_Rk_rolling_3_away', 'AdjD_rolling_3_away',
       'AdjD_Rk_rolling_3_away', 'AdjT_rolling_3_away',
       'AdjT_Rk_rolling_3_away', 'Rk_rolling_5_away',
       'AdjEM_rolling_5_away', 'AdjO_rolling_5_away',
       'AdjO_Rk_rolling_5_away', 'AdjD_rolling_5_away',
       'AdjD_Rk_rolling_5_away', 'AdjT_rolling_5_away',
       'AdjT_Rk_rolling_5_away', 'Rk_rolling_10_away',
       'AdjEM_rolling_10_away', 'AdjO_rolling_10_away',
       'AdjO_Rk_rolling_10_away', 'AdjD_rolling_10_away',
       'AdjD_Rk_rolling_10_away', 'AdjT_rolling_10_away',
       'AdjT_Rk_rolling_10_away', 'Rk_rolling_20_away',
       'AdjEM_rolling_20_away', 'AdjO_rolling_20_away',
       'AdjO_Rk_rolling_20_away', 'AdjD_rolling_20_away',
       'AdjD_Rk_rolling_20_away', 'AdjT_rolling_20_away',
       'AdjT_Rk_rolling_20_away',  'home_sos',
       'away_sos', 'home_days_since_last_game',
       'home_rolling_avg_score_1', 'home_rolling_avg_score_3',
       'home_rolling_avg_score_7', 'home_rolling_sos_1',
       'home_rolling_sos_3', 'home_rolling_sos_7',
       'home_rolling_avg_score_allowed_1',
       'home_rolling_avg_score_allowed_3',
       'home_rolling_avg_score_allowed_7',
       'home_rolling_opp_score_rank_1', 'home_rolling_opp_score_rank_3',
       'home_rolling_opp_score_rank_7', 'away_days_since_last_game',
       'away_rolling_avg_score_1', 'away_rolling_avg_score_3',
       'away_rolling_avg_score_7', 'away_rolling_sos_1',
       'away_rolling_sos_3', 'away_rolling_sos_7',
       'away_rolling_avg_score_allowed_1',
       'away_rolling_avg_score_allowed_3',
       'away_rolling_avg_score_allowed_7',
       'away_rolling_opp_score_rank_1', 'away_rolling_opp_score_rank_3',
       'away_rolling_opp_score_rank_7']]  # Adjust columns as needed
categorical_columns = ['Conf_home', 'Conf_away']

# Encode categorical columns
for cat_col in categorical_columns:
    le = LabelEncoder()
    model_df[cat_col] = le.fit_transform(model_df[cat_col])

# Convert 'Date' column to datetime
model_df['Date'] = pd.to_datetime(model_df['Date'])
model_df = model_df.sort_values(by='Date').reset_index(drop=True)

# Train-test split
split_index = int(len(model_df) * 0.8)
train_data = model_df.iloc[:split_index]
test_data = model_df.iloc[split_index:]
X_train = train_data.drop(columns=['Date', 'home_win'])
y_train = train_data['home_win']
X_test = test_data.drop(columns=['Date', 'home_win'])
y_test = test_data['home_win']

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train AdaBoost model
base_estimator = DecisionTreeClassifier(max_depth=3)
adaboost = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=100, learning_rate=0.1)
adaboost.fit(X_train, y_train)

# Save the trained model
model_path = "adaboost_models/home_win_adaboost_model.joblib"
joblib.dump(adaboost, model_path)
print(f"AdaBoost model saved to {model_path}")

# Predict and evaluate
y_pred = adaboost.predict(X_test)
y_pred_proba = adaboost.predict_proba(X_test)[:, 1]

# Performance metrics
auc = roc_auc_score(y_test, y_pred_proba)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"AdaBoost AUC: {auc:.4f}")
print(f"AdaBoost Accuracy: {accuracy:.4f}")
print(f"AdaBoost F1 Score: {f1:.4f}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df[cat_col] = le.fit_transform(model_df[cat_col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df['Date'] = pd.to_datetime(model_df['Date'])


AdaBoost model saved to adaboost_models/home_win_adaboost_model.joblib
AdaBoost AUC: 0.7729
AdaBoost Accuracy: 0.6973
AdaBoost F1 Score: 0.6971


In [14]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Sample data preparation
model_df = df[['away_win','Date','Rk_home','Conf_home', 'AdjEM_home', 'AdjO_home',
       'AdjO_Rk_home', 'AdjD_home', 'AdjD_Rk_home', 'AdjT_home',
       'AdjT_Rk_home', 'Rk_rolling_1_home',
       'AdjEM_rolling_1_home', 'AdjO_rolling_1_home',
       'AdjO_Rk_rolling_1_home', 'AdjD_rolling_1_home',
       'AdjD_Rk_rolling_1_home', 'AdjT_rolling_1_home',
       'AdjT_Rk_rolling_1_home', 'Rk_rolling_3_home',
       'AdjEM_rolling_3_home', 'AdjO_rolling_3_home',
       'AdjO_Rk_rolling_3_home', 'AdjD_rolling_3_home',
       'AdjD_Rk_rolling_3_home', 'AdjT_rolling_3_home',
       'AdjT_Rk_rolling_3_home', 'Rk_rolling_5_home',
       'AdjEM_rolling_5_home', 'AdjO_rolling_5_home',
       'AdjO_Rk_rolling_5_home', 'AdjD_rolling_5_home',
       'AdjD_Rk_rolling_5_home', 'AdjT_rolling_5_home',
       'AdjT_Rk_rolling_5_home', 'Rk_rolling_10_home',
       'AdjEM_rolling_10_home', 'AdjO_rolling_10_home',
       'AdjO_Rk_rolling_10_home', 'AdjD_rolling_10_home',
       'AdjD_Rk_rolling_10_home', 'AdjT_rolling_10_home',
       'AdjT_Rk_rolling_10_home', 'Rk_rolling_20_home',
       'AdjEM_rolling_20_home', 'AdjO_rolling_20_home',
       'AdjO_Rk_rolling_20_home', 'AdjD_rolling_20_home',
       'AdjD_Rk_rolling_20_home', 'AdjT_rolling_20_home',
       'AdjT_Rk_rolling_20_home', 'Rk_away', 'Conf_away',
       'AdjEM_away', 'AdjO_away', 'AdjO_Rk_away', 'AdjD_away',
       'AdjD_Rk_away', 'AdjT_away', 'AdjT_Rk_away',
       'Rk_rolling_1_away', 'AdjEM_rolling_1_away', 'AdjO_rolling_1_away',
       'AdjO_Rk_rolling_1_away', 'AdjD_rolling_1_away',
       'AdjD_Rk_rolling_1_away', 'AdjT_rolling_1_away',
       'AdjT_Rk_rolling_1_away', 'Rk_rolling_3_away',
       'AdjEM_rolling_3_away', 'AdjO_rolling_3_away',
       'AdjO_Rk_rolling_3_away', 'AdjD_rolling_3_away',
       'AdjD_Rk_rolling_3_away', 'AdjT_rolling_3_away',
       'AdjT_Rk_rolling_3_away', 'Rk_rolling_5_away',
       'AdjEM_rolling_5_away', 'AdjO_rolling_5_away',
       'AdjO_Rk_rolling_5_away', 'AdjD_rolling_5_away',
       'AdjD_Rk_rolling_5_away', 'AdjT_rolling_5_away',
       'AdjT_Rk_rolling_5_away', 'Rk_rolling_10_away',
       'AdjEM_rolling_10_away', 'AdjO_rolling_10_away',
       'AdjO_Rk_rolling_10_away', 'AdjD_rolling_10_away',
       'AdjD_Rk_rolling_10_away', 'AdjT_rolling_10_away',
       'AdjT_Rk_rolling_10_away', 'Rk_rolling_20_away',
       'AdjEM_rolling_20_away', 'AdjO_rolling_20_away',
       'AdjO_Rk_rolling_20_away', 'AdjD_rolling_20_away',
       'AdjD_Rk_rolling_20_away', 'AdjT_rolling_20_away',
       'AdjT_Rk_rolling_20_away',  'home_sos',
       'away_sos', 'home_days_since_last_game',
       'home_rolling_avg_score_1', 'home_rolling_avg_score_3',
       'home_rolling_avg_score_7', 'home_rolling_sos_1',
       'home_rolling_sos_3', 'home_rolling_sos_7',
       'home_rolling_avg_score_allowed_1',
       'home_rolling_avg_score_allowed_3',
       'home_rolling_avg_score_allowed_7',
       'home_rolling_opp_score_rank_1', 'home_rolling_opp_score_rank_3',
       'home_rolling_opp_score_rank_7', 'away_days_since_last_game',
       'away_rolling_avg_score_1', 'away_rolling_avg_score_3',
       'away_rolling_avg_score_7', 'away_rolling_sos_1',
       'away_rolling_sos_3', 'away_rolling_sos_7',
       'away_rolling_avg_score_allowed_1',
       'away_rolling_avg_score_allowed_3',
       'away_rolling_avg_score_allowed_7',
       'away_rolling_opp_score_rank_1', 'away_rolling_opp_score_rank_3',
       'away_rolling_opp_score_rank_7']]  # Adjust columns as needed
categorical_columns = ['Conf_home', 'Conf_away']

# Encode categorical columns
for cat_col in categorical_columns:
    le = LabelEncoder()
    model_df[cat_col] = le.fit_transform(model_df[cat_col])

# Convert 'Date' column to datetime
model_df['Date'] = pd.to_datetime(model_df['Date'])
model_df = model_df.sort_values(by='Date').reset_index(drop=True)

# Train-test split
split_index = int(len(model_df) * 0.8)
train_data = model_df.iloc[:split_index]
test_data = model_df.iloc[split_index:]
X_train = train_data.drop(columns=['Date', 'away_win'])
y_train = train_data['away_win']
X_test = test_data.drop(columns=['Date', 'away_win'])
y_test = test_data['away_win']

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train AdaBoost model
base_estimator = DecisionTreeClassifier(max_depth=3)
adaboost = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=100, learning_rate=0.1)
adaboost.fit(X_train, y_train)

# Save the trained model
model_path = "adaboost_models/away_win_adaboost_model.joblib"
joblib.dump(adaboost, model_path)
print(f"AdaBoost model saved to {model_path}")

# Predict and evaluate
y_pred = adaboost.predict(X_test)
y_pred_proba = adaboost.predict_proba(X_test)[:, 1]

# Performance metrics
auc = roc_auc_score(y_test, y_pred_proba)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"AdaBoost AUC: {auc:.4f}")
print(f"AdaBoost Accuracy: {accuracy:.4f}")
print(f"AdaBoost F1 Score: {f1:.4f}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df[cat_col] = le.fit_transform(model_df[cat_col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  model_df['Date'] = pd.to_datetime(model_df['Date'])


AdaBoost model saved to adaboost_models/away_win_adaboost_model.joblib
AdaBoost AUC: 0.7727
AdaBoost Accuracy: 0.6972
AdaBoost F1 Score: 0.6974
