# Datium Data Science Test

### Import dependencies

In [21]:
import os

# suppress console output
os.environ['MLFLOW_LOGGING_ENABLED'] = 'false'

import warnings

# ignore all warnings
warnings.filterwarnings('ignore')

import optuna
import mlflow
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from skimpy import skim

### Load dataset

In [22]:
df_train = pd.read_csv('data/DatiumTrain.rpt', delimiter = '\t', low_memory=False)
df_test = pd.read_csv('data/DatiumTest.rpt', delimiter = '\t', low_memory=False)

### Exploratory Data Analyis

In [23]:
# Summarize training dataframe
skim(df_train)

In [24]:
# Drop columns missing above 80% data
thresh_na = len(df_train)*0.8

df_train = df_train.dropna(thresh=thresh_na, axis=1)
skim(df_train)

In [25]:
# Drop rows with no labels
df_train = df_train.dropna(subset=["Sold_Amount"])
skim(df_train)

In [26]:
# Define features and target
columns_to_drop = [
    "AvgWholesale",
    "AvgRetail",
    "GoodWholesale",
    "GoodRetail",
    "TradeMin",
    "TradeMax",
    "PrivateMax",
]
for column in columns_to_drop:
    if column in df_train.columns:
        df_train = df_train.drop(column, axis=1)

X_train = df_train.drop(["Sold_Amount"], axis=1)
y_train = df_train["Sold_Amount"]

X_test = df_test.drop(["Sold_Amount"], axis=1)
y_test = df_test["Sold_Amount"]

features = X_train.columns.to_list()

### Data Preprocessing

In [27]:
cat_cols = X_train.select_dtypes(exclude="number").columns
num_cols = X_train.select_dtypes(include="number").columns

In [28]:
# Categorical features
categorical_pipeline = Pipeline(
    steps=[
        ("SimpleImputer", SimpleImputer(strategy="most_frequent")),
        ("OneHotEncoder", OneHotEncoder(handle_unknown='ignore'))
    ]
)

# Numerical features
numeric_pipeline = Pipeline(
    steps=[("SimpleImputer", SimpleImputer(strategy="mean")), 
        ("StandardScaler", StandardScaler())]
)

full_processor = ColumnTransformer(
    transformers=[
        ("numeric", numeric_pipeline, num_cols),
        ("categorical", categorical_pipeline, cat_cols),
    ]
)

# Fit and transform the encoder on the training data
X_train_encoded = full_processor.fit_transform(X_train, y_train)

# Transform the encoder on the testing data
X_test_encoded = full_processor.transform(X_test)

### Modelling

In [29]:
class LGBMRegressorOptuna:
    def __init__(self, num_boost_round=100, n_trials=100):
        self.num_boost_round = num_boost_round
        self.n_trials = n_trials
        self.best_params = None
        self.run_name = "LGBMRegressorOptuna"
    
    def fit(self, X_train, y_train, X_test, y_test):
        # Start an MLflow run
        with mlflow.start_run(run_name=self.run_name):
            # Define the objective function for Optuna
            def objective(trial):
                # Define the hyperparameters to tune
                params = {
                    'objective': 'regression',
                    'metric': 'rmse',
                    'num_leaves': trial.suggest_int('num_leaves', 10, 100),
                    'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.1),
                    'feature_fraction': trial.suggest_uniform('feature_fraction', 0.1, 1.0),
                    'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.1, 1.0),
                    'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
                    'min_child_samples': trial.suggest_int('min_child_samples', 1, 50),
                    'reg_alpha': trial.suggest_loguniform('reg_alpha', 0.001, 10.0),
                    'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.001, 10.0),
                }

                # Convert the training data into LightGBM format
                train_data = lgb.Dataset(X_train, label=y_train)

                # Train the LightGBM model
                model = lgb.train(params, train_data, num_boost_round=self.num_boost_round)

                # Use the trained model to make predictions on the test data
                y_pred = model.predict(X_test)

                # Calculate the root mean squared error (RMSE) of the predictions
                rmse = mean_squared_error(y_test, y_pred, squared=False)

                # Return the RMSE as the objective value to minimize
                return rmse

            # Split the dataset into training and testing sets
            X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

            # Create an Optuna study
            study = optuna.create_study(direction='minimize')

            # Optimize the study using the objective function
            study.optimize(objective, n_trials=self.n_trials)

            # Store the best hyperparameters
            self.best_params = study.best_trial.params

            # Train a final LightGBM model using the best hyperparameters
            self.model = lgb.train(self.best_params, lgb.Dataset(X_train, label=y_train), num_boost_round=self.num_boost_round)

            y_pred_test = self.model.predict(X_test)
            self.mse_test = mean_squared_error(y_test, y_pred_test)
            self.rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
            self.r2_test = r2_score(y_test, y_pred_test)

            # Log the best hyperparameters as MLflow tags
            mlflow.set_tags(self.best_params)

            # Log the RMSE and hyperparameters as MLflow parameters
            mlflow.log_params(self.best_params)
            mlflow.log_metric("mse", self.mse_test)
            mlflow.log_metric("rmse", self.rmse_test)
            mlflow.log_metric("r2", self.r2_test)

            mlflow.sklearn.log_model(self.model, "model")

            # Print the best hyperparameters and the best RMSE achieved
            print('Best hyperparameters:', self.best_params)
            print('Best MSE  :', self.mse_test)
            print('Best RMSE :', self.rmse_test)
            print('Best R2   :', self.r2_test)

    def predict(self, X):
        return self.model.predict(X)
    
    def score(self, X, y):
        y_pred = self.predict(X)
        return r2_score(y, y_pred)


In [30]:
# Initialize the LGBMRegressorOptuna class
lgbm = LGBMRegressorOptuna(num_boost_round=100, n_trials=100)

# Fit the model to the data
lgbm.fit(X_train_encoded, y_train, X_test_encoded, y_test)

# Make predictions on new data
y_pred = lgbm.predict(X_test_encoded)

# Compute the R^2 score on the test data
r2 = lgbm.score(X_test_encoded, y_test)

[32m[I 2023-03-16 02:49:14,993][0m A new study created in memory with name: no-name-ba545a62-94f7-4e14-a21c-9716acf05e97[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7931
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2306
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:15,628][0m Trial 0 finished with value: 8009.657268735604 and parameters: {'num_leaves': 14, 'learning_rate': 0.003437922316116085, 'feature_fraction': 0.5583190891405722, 'bagging_fraction': 0.233576154900408, 'bagging_freq': 4, 'min_child_samples': 30, 'reg_alpha': 0.004741113188703143, 'reg_lambda': 0.010413465868789358}. Best is trial 0 with value: 8009.657268735604.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:18,123][0m Trial 1 finished with value: 3066.0116080686685 and parameters: {'num_leaves': 71, 'learning_rate': 0.04143378561406797, 'feature_fraction': 0.8899959573539186, 'bagging_fraction': 0.5099037700262828, 'bagging_freq': 3, 'min_child_samples': 10, 'reg_alpha': 2.5407627178981276, 'reg_lambda': 0.05088075743820339}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8449
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2565
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:19,423][0m Trial 2 finished with value: 6072.260894031727 and parameters: {'num_leaves': 89, 'learning_rate': 0.008149706550053419, 'feature_fraction': 0.4164019114717241, 'bagging_fraction': 0.8411647217950684, 'bagging_freq': 6, 'min_child_samples': 26, 'reg_alpha': 0.0015682842545745271, 'reg_lambda': 3.6572019772307107}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20045
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 8363
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:22,424][0m Trial 3 finished with value: 4938.77510936702 and parameters: {'num_leaves': 55, 'learning_rate': 0.010931690513013851, 'feature_fraction': 0.8481390444965656, 'bagging_fraction': 0.982170173625802, 'bagging_freq': 4, 'min_child_samples': 6, 'reg_alpha': 3.967782727470392, 'reg_lambda': 0.256876505223551}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8195
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2438
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:23,358][0m Trial 4 finished with value: 3212.7444390030464 and parameters: {'num_leaves': 56, 'learning_rate': 0.041592117298277616, 'feature_fraction': 0.8179633255101835, 'bagging_fraction': 0.17097721268249857, 'bagging_freq': 2, 'min_child_samples': 28, 'reg_alpha': 0.521702722574867, 'reg_lambda': 5.730517876404191}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21915
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 9298
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:25,765][0m Trial 5 finished with value: 8668.666353663226 and parameters: {'num_leaves': 44, 'learning_rate': 0.0016405538223503591, 'feature_fraction': 0.5386593650257262, 'bagging_fraction': 0.7522602510050329, 'bagging_freq': 3, 'min_child_samples': 5, 'reg_alpha': 0.02487481526467304, 'reg_lambda': 0.45562441218076094}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6591
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1636
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:26,668][0m Trial 6 finished with value: 7512.170594188678 and parameters: {'num_leaves': 59, 'learning_rate': 0.005539930736854072, 'feature_fraction': 0.16030239747853176, 'bagging_fraction': 0.9402176647698196, 'bagging_freq': 5, 'min_child_samples': 49, 'reg_alpha': 0.039316010977301086, 'reg_lambda': 0.0014617884703013314}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9441
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3061
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:28,623][0m Trial 7 finished with value: 8595.490894432796 and parameters: {'num_leaves': 84, 'learning_rate': 0.0015037709843650007, 'feature_fraction': 0.8204464853695168, 'bagging_fraction': 0.8876827533707595, 'bagging_freq': 9, 'min_child_samples': 21, 'reg_alpha': 0.03375525336865134, 'reg_lambda': 0.048704106255675034}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21915
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 9298
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:30,777][0m Trial 8 finished with value: 3182.8685872744445 and parameters: {'num_leaves': 59, 'learning_rate': 0.03829634332317343, 'feature_fraction': 0.4635705805831225, 'bagging_fraction': 0.47715005581142045, 'bagging_freq': 7, 'min_child_samples': 5, 'reg_alpha': 0.02823666331291328, 'reg_lambda': 0.007784737787306918}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6683
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1682
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:32,246][0m Trial 9 finished with value: 3994.5930782325 and parameters: {'num_leaves': 80, 'learning_rate': 0.01616963936852645, 'feature_fraction': 0.9111757776368901, 'bagging_fraction': 0.9095435663389079, 'bagging_freq': 10, 'min_child_samples': 47, 'reg_alpha': 0.004450069333118398, 'reg_lambda': 0.004439629433323627}. Best is trial 1 with value: 3066.0116080686685.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3898
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:33,252][0m Trial 10 finished with value: 2943.717659483717 and parameters: {'num_leaves': 28, 'learning_rate': 0.09948150096044345, 'feature_fraction': 0.7172309585522569, 'bagging_fraction': 0.5525176186764036, 'bagging_freq': 1, 'min_child_samples': 16, 'reg_alpha': 3.8293786130562952, 'reg_lambda': 0.04936731324479798}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3898
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:34,148][0m Trial 11 finished with value: 2986.170222257194 and parameters: {'num_leaves': 21, 'learning_rate': 0.08988282399129005, 'feature_fraction': 0.9881094294006563, 'bagging_fraction': 0.5569439788013105, 'bagging_freq': 1, 'min_child_samples': 16, 'reg_alpha': 6.80389198243178, 'reg_lambda': 0.050944359673551654}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12151
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4416
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:35,006][0m Trial 12 finished with value: 3039.769282046612 and parameters: {'num_leaves': 18, 'learning_rate': 0.09777270006252706, 'feature_fraction': 0.6925910453171416, 'bagging_fraction': 0.6478729828950612, 'bagging_freq': 1, 'min_child_samples': 14, 'reg_alpha': 8.470523978931876, 'reg_lambda': 0.035562682565591065}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10289
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3485
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:36,212][0m Trial 13 finished with value: 3066.5509921992593 and parameters: {'num_leaves': 28, 'learning_rate': 0.08977020932554486, 'feature_fraction': 0.9859341605516193, 'bagging_fraction': 0.3845408978105064, 'bagging_freq': 1, 'min_child_samples': 18, 'reg_alpha': 0.857511535771045, 'reg_lambda': 0.14580697841199952}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7179
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1930
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:37,315][0m Trial 14 finished with value: 2981.3859693743566 and parameters: {'num_leaves': 32, 'learning_rate': 0.09448631923791713, 'feature_fraction': 0.9923200565541261, 'bagging_fraction': 0.6708315414596376, 'bagging_freq': 1, 'min_child_samples': 38, 'reg_alpha': 7.343168630010021, 'reg_lambda': 0.5689730039035271}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7237
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1959
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:38,373][0m Trial 15 finished with value: 3846.7271360016975 and parameters: {'num_leaves': 35, 'learning_rate': 0.02157716155514171, 'feature_fraction': 0.6937558902352352, 'bagging_fraction': 0.7105377752256732, 'bagging_freq': 2, 'min_child_samples': 37, 'reg_alpha': 1.2167429093274837, 'reg_lambda': 0.7961654300556542}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7113
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1897
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:39,210][0m Trial 16 finished with value: 3072.2265645877746 and parameters: {'num_leaves': 39, 'learning_rate': 0.05559845619055746, 'feature_fraction': 0.7381044097666989, 'bagging_fraction': 0.6100403701426068, 'bagging_freq': 8, 'min_child_samples': 39, 'reg_alpha': 9.84409811465387, 'reg_lambda': 1.3445477238264456}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7179
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1930
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:40,090][0m Trial 17 finished with value: 3831.884775673643 and parameters: {'num_leaves': 29, 'learning_rate': 0.0226252032483995, 'feature_fraction': 0.990714613528468, 'bagging_fraction': 0.7105244078731484, 'bagging_freq': 3, 'min_child_samples': 38, 'reg_alpha': 0.29562992253229603, 'reg_lambda': 1.4158696658773777}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7729
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2205
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:41,442][0m Trial 18 finished with value: 3099.6413161265887 and parameters: {'num_leaves': 45, 'learning_rate': 0.056805087271461954, 'feature_fraction': 0.7770113344976356, 'bagging_fraction': 0.79214122722012, 'bagging_freq': 5, 'min_child_samples': 32, 'reg_alpha': 2.1833728361400637, 'reg_lambda': 0.16744015679043214}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6777
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1729
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:43,412][0m Trial 19 finished with value: 3358.481031786304 and parameters: {'num_leaves': 99, 'learning_rate': 0.027208426152404316, 'feature_fraction': 0.6519208857824321, 'bagging_fraction': 0.42099652719463626, 'bagging_freq': 2, 'min_child_samples': 45, 'reg_alpha': 2.657960182619369, 'reg_lambda': 0.43573202531899413}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9441
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3061
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:44,263][0m Trial 20 finished with value: 3363.212600871551 and parameters: {'num_leaves': 12, 'learning_rate': 0.07570218897471949, 'feature_fraction': 0.9067502257427336, 'bagging_fraction': 0.6489267070916085, 'bagging_freq': 4, 'min_child_samples': 21, 'reg_alpha': 0.2411232795360949, 'reg_lambda': 8.43812467209992}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12151
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4416
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:45,297][0m Trial 21 finished with value: 3012.738590750731 and parameters: {'num_leaves': 22, 'learning_rate': 0.08922317636642646, 'feature_fraction': 0.9970646918397477, 'bagging_fraction': 0.5651408511971735, 'bagging_freq': 1, 'min_child_samples': 14, 'reg_alpha': 6.014665138587467, 'reg_lambda': 0.06899951319169967}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9441
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3061
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:46,179][0m Trial 22 finished with value: 3122.884653747003 and parameters: {'num_leaves': 27, 'learning_rate': 0.06086334581914596, 'feature_fraction': 0.9107085185401437, 'bagging_fraction': 0.5622140980109647, 'bagging_freq': 1, 'min_child_samples': 21, 'reg_alpha': 4.2935099396335605, 'reg_lambda': 0.08936479335592033}. Best is trial 10 with value: 2943.717659483717.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12807
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4744
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:47,694][0m Trial 23 finished with value: 2840.168761896212 and parameters: {'num_leaves': 38, 'learning_rate': 0.0965106071321211, 'feature_fraction': 0.8225539476162963, 'bagging_fraction': 0.6398757833787688, 'bagging_freq': 2, 'min_child_samples': 13, 'reg_alpha': 9.465856477000807, 'reg_lambda': 0.024178217950624668}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:49,793][0m Trial 24 finished with value: 3009.6942030580212 and parameters: {'num_leaves': 47, 'learning_rate': 0.056402940825034734, 'feature_fraction': 0.7809168562937585, 'bagging_fraction': 0.6957401002178705, 'bagging_freq': 2, 'min_child_samples': 10, 'reg_alpha': 1.4699954964433937, 'reg_lambda': 0.022579923614941485}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14257
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5469
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:51,770][0m Trial 25 finished with value: 3259.94868798098 and parameters: {'num_leaves': 36, 'learning_rate': 0.03727202485816945, 'feature_fraction': 0.8582697775611043, 'bagging_fraction': 0.7988393474667737, 'bagging_freq': 3, 'min_child_samples': 11, 'reg_alpha': 9.93450667686766, 'reg_lambda': 0.021125127938733747}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 34365
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 15523
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:55,447][0m Trial 26 finished with value: 3023.3103663210686 and parameters: {'num_leaves': 33, 'learning_rate': 0.06227830257224235, 'feature_fraction': 0.7605610557761328, 'bagging_fraction': 0.6310459259092916, 'bagging_freq': 2, 'min_child_samples': 2, 'reg_alpha': 3.536817282686853, 'reg_lambda': 0.17283462145979356}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7413
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2047
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:56,491][0m Trial 27 finished with value: 3002.7885408919774 and parameters: {'num_leaves': 49, 'learning_rate': 0.07292390017439832, 'feature_fraction': 0.9033312853090424, 'bagging_fraction': 0.7383219106244863, 'bagging_freq': 1, 'min_child_samples': 35, 'reg_alpha': 1.501062036745367, 'reg_lambda': 0.0883685058255552}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6921
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1801
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:57,386][0m Trial 28 finished with value: 3521.4655086306534 and parameters: {'num_leaves': 40, 'learning_rate': 0.028083261349084065, 'feature_fraction': 0.8114446878581679, 'bagging_fraction': 0.44536479953808666, 'bagging_freq': 2, 'min_child_samples': 42, 'reg_alpha': 4.7011493413549745, 'reg_lambda': 0.023048000726892896}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7729
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2205
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:58,102][0m Trial 29 finished with value: 3287.4974456462737 and parameters: {'num_leaves': 11, 'learning_rate': 0.09293173322344989, 'feature_fraction': 0.6079779191933191, 'bagging_fraction': 0.3461050883253276, 'bagging_freq': 4, 'min_child_samples': 32, 'reg_alpha': 2.4134642826616997, 'reg_lambda': 0.01176881965795636}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8833
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2757
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:49:59,643][0m Trial 30 finished with value: 3057.6667318886043 and parameters: {'num_leaves': 67, 'learning_rate': 0.049413977578292495, 'feature_fraction': 0.73735571549905, 'bagging_fraction': 0.3220787835539531, 'bagging_freq': 6, 'min_child_samples': 24, 'reg_alpha': 0.7277457975740966, 'reg_lambda': 0.1412053254690696}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10687
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3684
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:00,683][0m Trial 31 finished with value: 3110.993379568137 and parameters: {'num_leaves': 20, 'learning_rate': 0.0728897117387645, 'feature_fraction': 0.9550678644948806, 'bagging_fraction': 0.5154232578712659, 'bagging_freq': 1, 'min_child_samples': 17, 'reg_alpha': 6.095527230204116, 'reg_lambda': 0.06147966356157751}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11641
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4161
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:01,733][0m Trial 32 finished with value: 3048.579263308402 and parameters: {'num_leaves': 23, 'learning_rate': 0.07283562207693463, 'feature_fraction': 0.926898304253324, 'bagging_fraction': 0.558662637471534, 'bagging_freq': 3, 'min_child_samples': 15, 'reg_alpha': 5.88271794647172, 'reg_lambda': 0.029908029863113063}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8641
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2661
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:02,411][0m Trial 33 finished with value: 3173.018226377156 and parameters: {'num_leaves': 16, 'learning_rate': 0.09820831772118463, 'feature_fraction': 0.8679683747877841, 'bagging_fraction': 0.5047453838039471, 'bagging_freq': 1, 'min_child_samples': 25, 'reg_alpha': 2.43640613404419, 'reg_lambda': 0.03996682731445732}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:04,249][0m Trial 34 finished with value: 3240.92543115197 and parameters: {'num_leaves': 30, 'learning_rate': 0.043747598472049315, 'feature_fraction': 0.9503983515040348, 'bagging_fraction': 0.5980955731736323, 'bagging_freq': 2, 'min_child_samples': 10, 'reg_alpha': 8.769801761954621, 'reg_lambda': 0.07924092741740933}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10289
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3485
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:05,471][0m Trial 35 finished with value: 3427.7441979059636 and parameters: {'num_leaves': 25, 'learning_rate': 0.035543489294201286, 'feature_fraction': 0.8392225294943634, 'bagging_fraction': 0.6724484910591677, 'bagging_freq': 3, 'min_child_samples': 18, 'reg_alpha': 4.559372521374935, 'reg_lambda': 0.36394384145586156}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8053
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 2367
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:06,560][0m Trial 36 finished with value: 3108.727029079662 and parameters: {'num_leaves': 40, 'learning_rate': 0.052533513286716634, 'feature_fraction': 0.8720434846159278, 'bagging_fraction': 0.6158301677100682, 'bagging_freq': 4, 'min_child_samples': 29, 'reg_alpha': 3.256596452799889, 'reg_lambda': 0.04708011016350711}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12807
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4744
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:08,447][0m Trial 37 finished with value: 2871.595599334849 and parameters: {'num_leaves': 52, 'learning_rate': 0.07107552555591402, 'feature_fraction': 0.9590123091910742, 'bagging_fraction': 0.5377417440911278, 'bagging_freq': 2, 'min_child_samples': 13, 'reg_alpha': 1.7333163880997844, 'reg_lambda': 0.22835900613630192}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 34365
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 15523
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:13,981][0m Trial 38 finished with value: 2869.3559390441283 and parameters: {'num_leaves': 52, 'learning_rate': 0.07090072728270107, 'feature_fraction': 0.8214779161492235, 'bagging_fraction': 0.5023307618143212, 'bagging_freq': 2, 'min_child_samples': 2, 'reg_alpha': 1.6867863490824722, 'reg_lambda': 0.30461708055930625}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48291
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 22486
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:24,743][0m Trial 39 finished with value: 3046.1584043288944 and parameters: {'num_leaves': 52, 'learning_rate': 0.04657186432648659, 'feature_fraction': 0.8276089690824411, 'bagging_fraction': 0.5193532467792663, 'bagging_freq': 3, 'min_child_samples': 1, 'reg_alpha': 1.6925779561660996, 'reg_lambda': 0.21018790217212766}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 18491
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 7586
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:28,961][0m Trial 40 finished with value: 2938.343443783358 and parameters: {'num_leaves': 64, 'learning_rate': 0.06770863208056727, 'feature_fraction': 0.7904202826138803, 'bagging_fraction': 0.46493171459458416, 'bagging_freq': 5, 'min_child_samples': 7, 'reg_alpha': 0.8673124432970192, 'reg_lambda': 0.244019918372624}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 18491
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 7586
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:31,751][0m Trial 41 finished with value: 2920.0734819439353 and parameters: {'num_leaves': 68, 'learning_rate': 0.07066335192888486, 'feature_fraction': 0.7972991106062679, 'bagging_fraction': 0.44862772839709153, 'bagging_freq': 7, 'min_child_samples': 7, 'reg_alpha': 0.9537223698226259, 'reg_lambda': 0.2423473403295258}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 18491
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 7586
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:34,762][0m Trial 42 finished with value: 2910.344279594738 and parameters: {'num_leaves': 67, 'learning_rate': 0.06875887816943348, 'feature_fraction': 0.7937829257206084, 'bagging_fraction': 0.47144139857655826, 'bagging_freq': 7, 'min_child_samples': 7, 'reg_alpha': 0.924733393765984, 'reg_lambda': 0.2932326821474034}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 18491
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 7586
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:37,982][0m Trial 43 finished with value: 3084.1902639573354 and parameters: {'num_leaves': 73, 'learning_rate': 0.0429197321451355, 'feature_fraction': 0.8187102665602604, 'bagging_fraction': 0.49003155689940203, 'bagging_freq': 7, 'min_child_samples': 7, 'reg_alpha': 0.39067595630597657, 'reg_lambda': 0.2944436192646951}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28017
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 12349
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:43,220][0m Trial 44 finished with value: 3188.359497028794 and parameters: {'num_leaves': 73, 'learning_rate': 0.03373123438304333, 'feature_fraction': 0.870141536541364, 'bagging_fraction': 0.431727176887462, 'bagging_freq': 7, 'min_child_samples': 3, 'reg_alpha': 0.5419846482391062, 'reg_lambda': 0.11102713091472202}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13479
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5080
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:45,016][0m Trial 45 finished with value: 2882.3471223869014 and parameters: {'num_leaves': 60, 'learning_rate': 0.0738378481952282, 'feature_fraction': 0.7878060317377079, 'bagging_fraction': 0.39660169807509893, 'bagging_freq': 8, 'min_child_samples': 12, 'reg_alpha': 1.218869963443053, 'reg_lambda': 0.2802416010501529}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12807
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4744
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:47,053][0m Trial 46 finished with value: 3036.0342862718494 and parameters: {'num_leaves': 59, 'learning_rate': 0.050787852432218734, 'feature_fraction': 0.7518963161718003, 'bagging_fraction': 0.2766582868017971, 'bagging_freq': 8, 'min_child_samples': 13, 'reg_alpha': 0.17573565992183784, 'reg_lambda': 0.5771997114392915}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21915
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 9298
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:50,263][0m Trial 47 finished with value: 2897.65335143136 and parameters: {'num_leaves': 52, 'learning_rate': 0.07853081849374993, 'feature_fraction': 0.9392025883292092, 'bagging_fraction': 0.40580707442438263, 'bagging_freq': 8, 'min_child_samples': 5, 'reg_alpha': 0.5663959715940119, 'reg_lambda': 0.3124792629778613}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21915
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 9298
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:53,364][0m Trial 48 finished with value: 4607.140638691755 and parameters: {'num_leaves': 54, 'learning_rate': 0.012579301753153325, 'feature_fraction': 0.9366953698385081, 'bagging_fraction': 0.39488464221863395, 'bagging_freq': 10, 'min_child_samples': 5, 'reg_alpha': 0.5356727190338991, 'reg_lambda': 0.11043955884523456}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13479
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5080
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:55,428][0m Trial 49 finished with value: 5705.89510122667 and parameters: {'num_leaves': 61, 'learning_rate': 0.008027010909911052, 'feature_fraction': 0.8809133491626151, 'bagging_fraction': 0.20773593066616022, 'bagging_freq': 9, 'min_child_samples': 12, 'reg_alpha': 1.807537156612742, 'reg_lambda': 0.9209418693867829}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21915
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 9298
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:50:59,224][0m Trial 50 finished with value: 2880.60808485794 and parameters: {'num_leaves': 78, 'learning_rate': 0.07990354307817658, 'feature_fraction': 0.9423064752582168, 'bagging_fraction': 0.3885925620993664, 'bagging_freq': 8, 'min_child_samples': 5, 'reg_alpha': 1.36398748463216, 'reg_lambda': 0.39334050496474515}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 24443
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 10562
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:03,901][0m Trial 51 finished with value: 2870.413024862729 and parameters: {'num_leaves': 79, 'learning_rate': 0.07533566646527091, 'feature_fraction': 0.9518429430053904, 'bagging_fraction': 0.3934659078786354, 'bagging_freq': 8, 'min_child_samples': 4, 'reg_alpha': 1.2276942515070206, 'reg_lambda': 0.3615134480189503}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 24443
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 10562
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:08,416][0m Trial 52 finished with value: 2951.509564426547 and parameters: {'num_leaves': 84, 'learning_rate': 0.05961235277812683, 'feature_fraction': 0.9517727767276825, 'bagging_fraction': 0.3527529989961377, 'bagging_freq': 9, 'min_child_samples': 4, 'reg_alpha': 1.1616348165675703, 'reg_lambda': 0.48136143139637766}. Best is trial 23 with value: 2840.168761896212.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:11,210][0m Trial 53 finished with value: 2837.208074237658 and parameters: {'num_leaves': 78, 'learning_rate': 0.08089276329302422, 'feature_fraction': 0.8936054175672502, 'bagging_fraction': 0.3780683538676331, 'bagging_freq': 8, 'min_child_samples': 9, 'reg_alpha': 2.92681431604018, 'reg_lambda': 0.7228303433828158}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:13,967][0m Trial 54 finished with value: 2905.2481971562943 and parameters: {'num_leaves': 78, 'learning_rate': 0.08339584933569272, 'feature_fraction': 0.8979546267170126, 'bagging_fraction': 0.3590158128723704, 'bagging_freq': 6, 'min_child_samples': 9, 'reg_alpha': 2.0004690443058775, 'reg_lambda': 0.7116776858969507}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48291
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 22486
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:24,520][0m Trial 55 finished with value: 2998.4845147905858 and parameters: {'num_leaves': 91, 'learning_rate': 0.08293818479970361, 'feature_fraction': 0.8455493146138421, 'bagging_fraction': 0.3009027277054439, 'bagging_freq': 9, 'min_child_samples': 1, 'reg_alpha': 2.9529176947556386, 'reg_lambda': 2.010299831732654}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17169
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6925
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:27,895][0m Trial 56 finished with value: 3012.4775498124795 and parameters: {'num_leaves': 79, 'learning_rate': 0.06292404464413834, 'feature_fraction': 0.9999416595262032, 'bagging_fraction': 0.5294316453595914, 'bagging_freq': 10, 'min_child_samples': 8, 'reg_alpha': 3.2428162591626046, 'reg_lambda': 0.4241539489449031}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28017
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 12349
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:33,803][0m Trial 57 finished with value: 3061.289044040429 and parameters: {'num_leaves': 87, 'learning_rate': 0.04136612817262407, 'feature_fraction': 0.9544585040556431, 'bagging_fraction': 0.4277454229119449, 'bagging_freq': 8, 'min_child_samples': 3, 'reg_alpha': 1.9537422515030904, 'reg_lambda': 0.7683264888012205}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:37,163][0m Trial 58 finished with value: 2892.868763126121 and parameters: {'num_leaves': 94, 'learning_rate': 0.09908454595857352, 'feature_fraction': 0.9028854244482045, 'bagging_fraction': 0.48702311655311126, 'bagging_freq': 6, 'min_child_samples': 9, 'reg_alpha': 4.5118455003443785, 'reg_lambda': 0.17135546487789857}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9935
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3308
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:39,401][0m Trial 59 finished with value: 3016.6703855665824 and parameters: {'num_leaves': 76, 'learning_rate': 0.05050357001537283, 'feature_fraction': 0.9680397912399288, 'bagging_fraction': 0.5814232536430036, 'bagging_freq': 8, 'min_child_samples': 19, 'reg_alpha': 1.3706719801012657, 'reg_lambda': 0.4254233925750524}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 24443
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 10562
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:43,800][0m Trial 60 finished with value: 2941.809654017425 and parameters: {'num_leaves': 83, 'learning_rate': 0.08329224497468422, 'feature_fraction': 0.9246962071817398, 'bagging_fraction': 0.36942104876593845, 'bagging_freq': 7, 'min_child_samples': 4, 'reg_alpha': 6.67915091677749, 'reg_lambda': 1.004634351602805}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14257
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5469
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:45,903][0m Trial 61 finished with value: 2951.7931588161227 and parameters: {'num_leaves': 57, 'learning_rate': 0.05880384443594309, 'feature_fraction': 0.8461615091115948, 'bagging_fraction': 0.3933043054304764, 'bagging_freq': 8, 'min_child_samples': 11, 'reg_alpha': 1.2623161271515146, 'reg_lambda': 0.214556740718712}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12807
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4744
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:47,357][0m Trial 62 finished with value: 2903.487308445828 and parameters: {'num_leaves': 43, 'learning_rate': 0.08128778205779452, 'feature_fraction': 0.8913037362579318, 'bagging_fraction': 0.41280731770667706, 'bagging_freq': 9, 'min_child_samples': 13, 'reg_alpha': 2.541900798720638, 'reg_lambda': 0.613724565802233}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20045
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 8363
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:50,333][0m Trial 63 finished with value: 2965.07993928782 and parameters: {'num_leaves': 48, 'learning_rate': 0.06555463608531421, 'feature_fraction': 0.974560695660659, 'bagging_fraction': 0.4588736048727355, 'bagging_freq': 8, 'min_child_samples': 6, 'reg_alpha': 0.7062711047373288, 'reg_lambda': 0.34535228705433635}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:52,691][0m Trial 64 finished with value: 2929.2704284499287 and parameters: {'num_leaves': 70, 'learning_rate': 0.09835827066602239, 'feature_fraction': 0.8361035471447006, 'bagging_fraction': 0.336028690955549, 'bagging_freq': 9, 'min_child_samples': 9, 'reg_alpha': 1.5510716803660354, 'reg_lambda': 0.5505061872561323}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28017
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 12349
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:51:57,213][0m Trial 65 finished with value: 2880.756959826278 and parameters: {'num_leaves': 64, 'learning_rate': 0.08155020587021809, 'feature_fraction': 0.9193777434862301, 'bagging_fraction': 0.37363324604069015, 'bagging_freq': 2, 'min_child_samples': 3, 'reg_alpha': 1.0168429056290542, 'reg_lambda': 0.1516343325456182}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 34365
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 15523
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:03,959][0m Trial 66 finished with value: 2974.9893251258936 and parameters: {'num_leaves': 75, 'learning_rate': 0.05460626022261308, 'feature_fraction': 0.9277843479974291, 'bagging_fraction': 0.533035235714399, 'bagging_freq': 2, 'min_child_samples': 2, 'reg_alpha': 3.6321198223827786, 'reg_lambda': 0.12804570941791002}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21915
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 9298
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:07,270][0m Trial 67 finished with value: 2879.7485476566994 and parameters: {'num_leaves': 64, 'learning_rate': 0.08684818265804436, 'feature_fraction': 0.9695746926781993, 'bagging_fraction': 0.27451703484782386, 'bagging_freq': 2, 'min_child_samples': 5, 'reg_alpha': 2.233961032493419, 'reg_lambda': 0.1594291797814328}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21915
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 9298
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:10,980][0m Trial 68 finished with value: 3005.8832958484254 and parameters: {'num_leaves': 82, 'learning_rate': 0.0646530849183306, 'feature_fraction': 0.9706787035954201, 'bagging_fraction': 0.1267823606507832, 'bagging_freq': 3, 'min_child_samples': 5, 'reg_alpha': 5.4896512615630915, 'reg_lambda': 0.1880848220075141}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48291
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 22486
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:21,814][0m Trial 69 finished with value: 2955.752548596951 and parameters: {'num_leaves': 88, 'learning_rate': 0.08865456327153963, 'feature_fraction': 0.8915817913604747, 'bagging_fraction': 0.26295030135142544, 'bagging_freq': 4, 'min_child_samples': 1, 'reg_alpha': 2.257121288979579, 'reg_lambda': 0.0863751195672453}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11641
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4161
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:23,417][0m Trial 70 finished with value: 3145.446194087866 and parameters: {'num_leaves': 44, 'learning_rate': 0.04513706733943696, 'feature_fraction': 0.9658159683821116, 'bagging_fraction': 0.3175428781378761, 'bagging_freq': 2, 'min_child_samples': 15, 'reg_alpha': 7.383109977734245, 'reg_lambda': 0.3929613952546704}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28017
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 12349
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:28,041][0m Trial 71 finished with value: 2904.0493870993732 and parameters: {'num_leaves': 64, 'learning_rate': 0.0809457240520413, 'feature_fraction': 0.9132220568741996, 'bagging_fraction': 0.37975087415435915, 'bagging_freq': 2, 'min_child_samples': 3, 'reg_alpha': 1.8639992009284665, 'reg_lambda': 0.13984820165460743}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 24443
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 10562
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:33,482][0m Trial 72 finished with value: 2918.359922243667 and parameters: {'num_leaves': 71, 'learning_rate': 0.08766975186812592, 'feature_fraction': 0.9997445688078127, 'bagging_fraction': 0.43978653070050544, 'bagging_freq': 2, 'min_child_samples': 4, 'reg_alpha': 3.8302385761101565, 'reg_lambda': 0.1852456091483172}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20045
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 8363
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:36,455][0m Trial 73 finished with value: 2969.120091485985 and parameters: {'num_leaves': 64, 'learning_rate': 0.07316322932670259, 'feature_fraction': 0.869292523678142, 'bagging_fraction': 0.37459964941753354, 'bagging_freq': 3, 'min_child_samples': 6, 'reg_alpha': 2.572161277550109, 'reg_lambda': 0.06684083490822298}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17169
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6925
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:38,755][0m Trial 74 finished with value: 2943.654679005274 and parameters: {'num_leaves': 57, 'learning_rate': 0.09929619949633792, 'feature_fraction': 0.9300239806602495, 'bagging_fraction': 0.29274750989288995, 'bagging_freq': 2, 'min_child_samples': 8, 'reg_alpha': 1.0505402445476488, 'reg_lambda': 0.24036109637513395}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 34365
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 15523
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:44,414][0m Trial 75 finished with value: 2970.876827330177 and parameters: {'num_leaves': 50, 'learning_rate': 0.05680849465550846, 'feature_fraction': 0.9728180162616453, 'bagging_fraction': 0.32893161223376116, 'bagging_freq': 1, 'min_child_samples': 2, 'reg_alpha': 0.6966885741944829, 'reg_lambda': 0.10583704458140185}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14257
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5469
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:46,706][0m Trial 76 finished with value: 2912.8050865210375 and parameters: {'num_leaves': 62, 'learning_rate': 0.06774928721176744, 'feature_fraction': 0.8841194742657401, 'bagging_fraction': 0.4951717890196335, 'bagging_freq': 3, 'min_child_samples': 11, 'reg_alpha': 4.834817872173396, 'reg_lambda': 0.15257640443733478}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17169
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6925
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:48,650][0m Trial 77 finished with value: 3007.5847068894373 and parameters: {'num_leaves': 55, 'learning_rate': 0.052105894911590685, 'feature_fraction': 0.8539573892740336, 'bagging_fraction': 0.25607645234423126, 'bagging_freq': 2, 'min_child_samples': 8, 'reg_alpha': 1.582155237445468, 'reg_lambda': 0.36378352957461546}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20045
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 8363
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:52,118][0m Trial 78 finished with value: 2869.429618398252 and parameters: {'num_leaves': 76, 'learning_rate': 0.07583155721664657, 'feature_fraction': 0.9198159301720151, 'bagging_fraction': 0.3550687876674008, 'bagging_freq': 1, 'min_child_samples': 6, 'reg_alpha': 3.1172527076077534, 'reg_lambda': 0.22217519976485106}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20045
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 8363
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:55,184][0m Trial 79 finished with value: 2872.5614874533258 and parameters: {'num_leaves': 78, 'learning_rate': 0.060440396784124835, 'feature_fraction': 0.8191741623180642, 'bagging_fraction': 0.3496514866518109, 'bagging_freq': 1, 'min_child_samples': 6, 'reg_alpha': 8.192837325197354, 'reg_lambda': 0.24577047887956224}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6547
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 1614
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:56,528][0m Trial 80 finished with value: 2933.2745519138193 and parameters: {'num_leaves': 81, 'learning_rate': 0.062428820571907725, 'feature_fraction': 0.812532935788725, 'bagging_fraction': 0.41600325746400035, 'bagging_freq': 1, 'min_child_samples': 50, 'reg_alpha': 9.798930272262943, 'reg_lambda': 0.26235863117322017}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20045
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 8363
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:52:59,595][0m Trial 81 finished with value: 2898.5228010151304 and parameters: {'num_leaves': 75, 'learning_rate': 0.07501111882759295, 'feature_fraction': 0.8678397632414836, 'bagging_fraction': 0.3441874038404714, 'bagging_freq': 1, 'min_child_samples': 6, 'reg_alpha': 7.183710572097535, 'reg_lambda': 0.2049859272977567}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:02,456][0m Trial 82 finished with value: 2859.0656132239515 and parameters: {'num_leaves': 86, 'learning_rate': 0.09050987755283287, 'feature_fraction': 0.951940746709227, 'bagging_fraction': 0.2945848799514151, 'bagging_freq': 1, 'min_child_samples': 10, 'reg_alpha': 3.1742771062721253, 'reg_lambda': 0.314450440308427}. Best is trial 53 with value: 2837.208074237658.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:05,102][0m Trial 83 finished with value: 2805.9527251651957 and parameters: {'num_leaves': 86, 'learning_rate': 0.09018960900118386, 'feature_fraction': 0.9128188556941866, 'bagging_fraction': 0.3142492548905205, 'bagging_freq': 1, 'min_child_samples': 10, 'reg_alpha': 2.986719312955511, 'reg_lambda': 0.5067009639186371}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12151
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4416
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:07,423][0m Trial 84 finished with value: 2912.529175944094 and parameters: {'num_leaves': 86, 'learning_rate': 0.06714490294932883, 'feature_fraction': 0.8310349945514647, 'bagging_fraction': 0.3112425422649475, 'bagging_freq': 1, 'min_child_samples': 14, 'reg_alpha': 3.227699272680721, 'reg_lambda': 0.5148150189933153}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:10,146][0m Trial 85 finished with value: 2859.2717209909115 and parameters: {'num_leaves': 93, 'learning_rate': 0.09057741063835914, 'feature_fraction': 0.7643988030392608, 'bagging_fraction': 0.3088898123288837, 'bagging_freq': 1, 'min_child_samples': 10, 'reg_alpha': 4.9927800775088125, 'reg_lambda': 0.30610942374090233}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:12,728][0m Trial 86 finished with value: 2874.5484228338128 and parameters: {'num_leaves': 98, 'learning_rate': 0.09086061036235643, 'feature_fraction': 0.767645314396555, 'bagging_fraction': 0.23229907703486313, 'bagging_freq': 1, 'min_child_samples': 10, 'reg_alpha': 5.426112949954648, 'reg_lambda': 0.6668752646778181}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12807
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4744
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:15,246][0m Trial 87 finished with value: 2947.6464567069174 and parameters: {'num_leaves': 90, 'learning_rate': 0.09920727024138708, 'feature_fraction': 0.9000656889428205, 'bagging_fraction': 0.2910690868699022, 'bagging_freq': 1, 'min_child_samples': 13, 'reg_alpha': 4.1382276237362134, 'reg_lambda': 0.35112729297017026}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14257
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5469
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:17,755][0m Trial 88 finished with value: 2856.1553348718144 and parameters: {'num_leaves': 94, 'learning_rate': 0.0743161492503486, 'feature_fraction': 0.7174448872205536, 'bagging_fraction': 0.31744785654907237, 'bagging_freq': 1, 'min_child_samples': 11, 'reg_alpha': 2.8632889468664695, 'reg_lambda': 0.4926702654217792}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14257
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5469
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:20,271][0m Trial 89 finished with value: 2844.0921379785577 and parameters: {'num_leaves': 92, 'learning_rate': 0.07660355212811787, 'feature_fraction': 0.7301356574314816, 'bagging_fraction': 0.31459559408918664, 'bagging_freq': 1, 'min_child_samples': 11, 'reg_alpha': 3.203337989819038, 'reg_lambda': 0.4800627861590364}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10687
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 3684
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:22,270][0m Trial 90 finished with value: 2883.3787729307705 and parameters: {'num_leaves': 94, 'learning_rate': 0.08935851918431383, 'feature_fraction': 0.7408403068551493, 'bagging_fraction': 0.31559502261450734, 'bagging_freq': 1, 'min_child_samples': 17, 'reg_alpha': 2.881404715439862, 'reg_lambda': 0.4693313474482758}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:24,934][0m Trial 91 finished with value: 2873.0899194242534 and parameters: {'num_leaves': 93, 'learning_rate': 0.0721617290000271, 'feature_fraction': 0.6992655324390092, 'bagging_fraction': 0.300032348936394, 'bagging_freq': 1, 'min_child_samples': 10, 'reg_alpha': 3.835684903998502, 'reg_lambda': 0.5511173106561738}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14257
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5469
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:27,352][0m Trial 92 finished with value: 2903.0594957471403 and parameters: {'num_leaves': 97, 'learning_rate': 0.05629270634945288, 'feature_fraction': 0.7168858394295973, 'bagging_fraction': 0.3370805452661089, 'bagging_freq': 1, 'min_child_samples': 11, 'reg_alpha': 5.797728111473296, 'reg_lambda': 0.3017306692826553}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11641
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 4161
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:29,556][0m Trial 93 finished with value: 3006.0427141607515 and parameters: {'num_leaves': 85, 'learning_rate': 0.047396876010602315, 'feature_fraction': 0.7697901686408974, 'bagging_fraction': 0.3597184471455027, 'bagging_freq': 1, 'min_child_samples': 15, 'reg_alpha': 4.814030829655139, 'reg_lambda': 0.8778815742946787}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13479
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5080
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:31,857][0m Trial 94 finished with value: 2871.4600248572738 and parameters: {'num_leaves': 100, 'learning_rate': 0.07394212726252705, 'feature_fraction': 0.6494805264166316, 'bagging_fraction': 0.3312638325424672, 'bagging_freq': 1, 'min_child_samples': 12, 'reg_alpha': 3.138657739911993, 'reg_lambda': 0.6835460458407511}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:34,569][0m Trial 95 finished with value: 2824.245475610874 and parameters: {'num_leaves': 92, 'learning_rate': 0.09024044392586392, 'feature_fraction': 0.7975793656281157, 'bagging_fraction': 0.245253673515186, 'bagging_freq': 1, 'min_child_samples': 9, 'reg_alpha': 2.9617977306358925, 'reg_lambda': 0.4531533349925369}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:37,505][0m Trial 96 finished with value: 2807.89708228935 and parameters: {'num_leaves': 91, 'learning_rate': 0.09219150819346023, 'feature_fraction': 0.8058781983682965, 'bagging_fraction': 0.24707138872821097, 'bagging_freq': 1, 'min_child_samples': 9, 'reg_alpha': 2.7083115821595873, 'reg_lambda': 1.1421850098565556}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:40,856][0m Trial 97 finished with value: 2858.739885829914 and parameters: {'num_leaves': 92, 'learning_rate': 0.09008918800566139, 'feature_fraction': 0.791598608462538, 'bagging_fraction': 0.24041180570131424, 'bagging_freq': 1, 'min_child_samples': 9, 'reg_alpha': 6.598502841891692, 'reg_lambda': 1.2176303915768185}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17169
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6925
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:43,610][0m Trial 98 finished with value: 2860.9889693980817 and parameters: {'num_leaves': 96, 'learning_rate': 0.09131326877892325, 'feature_fraction': 0.8020665552009496, 'bagging_fraction': 0.23756878737310466, 'bagging_freq': 1, 'min_child_samples': 8, 'reg_alpha': 6.891664073755378, 'reg_lambda': 1.2594177438259198}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16115
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 6398
[LightGBM] [Info] Start training from score 16423.053517


[32m[I 2023-03-16 02:53:45,890][0m Trial 99 finished with value: 2886.2040293946625 and parameters: {'num_leaves': 92, 'learning_rate': 0.09108103136847853, 'feature_fraction': 0.7742344506624493, 'bagging_fraction': 0.20557244361613375, 'bagging_freq': 1, 'min_child_samples': 9, 'reg_alpha': 8.271182548391527, 'reg_lambda': 1.8576220744674532}. Best is trial 83 with value: 2805.9527251651957.[0m


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15157
[LightGBM] [Info] Number of data points in the train set: 40560, number of used features: 5919
[LightGBM] [Info] Start training from score 16423.053517
Best hyperparameters: {'num_leaves': 86, 'learning_rate': 0.09018960900118386, 'feature_fraction': 0.9128188556941866, 'bagging_fraction': 0.3142492548905205, 'bagging_freq': 1, 'min_child_samples': 10, 'reg_alpha': 2.986719312955511, 'reg_lambda': 0.5067009639186371}
Best MSE  : 7873370.69586199
Best RMSE : 2805.9527251651957
Best R2   : 0.9156389250279962


### Model Performance

| Model           | Metrics         | Score           |
| --------------- | --------------- | --------------- |
| LightGBM        | RMSE            | 2,806           |
|                 | R2              | 0.916           |


### Discussion

- In this experiment, I started with exploring simpler models e.g. Linear Regression, Ridge and Lasso, paired with different permutations of feature encoders e.g. One Hot Encoder, Ordinal Encoder and Standard Scaler
- It is found that **Ridge model** with **One Hot Encoder + Standard Scaler** works best: 
    - R2: 0.769 
    - RMSE: 4,945
- Next, I explored using another regression model - **LightGBM Regression**, paired with One Hot Encoder + Standard Scaler preprocessing steps
- After auto-tuning using Optuna, the best performance is:
    - R2: 0.916
    - RMSE: 2,806

### Potential improvements
- Using Feature Importance to decide which features should be included and excluded
- Train on more data