In [1]:
import pandas as pd
import numpy as np
import mlflow

In [2]:
df = pd.read_csv(r'C:/Users/User/Desktop/MLOPs/youtube-comment/youtube-comment-analysis/data/processed/reddit_preprocessing.csv').dropna(subset=['clean_comment'])
df.shape

(36662, 2)

In [3]:
# Drop rows with NaN values in 'clean_comment'
cleaned_dataset = df.dropna()

In [4]:
# Separate features and target
X_cleaned = cleaned_dataset['clean_comment']
y_cleaned = cleaned_dataset['category']

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
# Split the cleaned data into train and test sets (80-20 split)
X_train_cleaned, X_test_cleaned, y_train_cleaned, y_test_cleaned = train_test_split(X_cleaned, y_cleaned, test_size=0.2, random_state=42)

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# Apply TfidfVectorizer with trigram setting and max_features=1000
tfidf_cleaned = TfidfVectorizer(ngram_range=(1, 3), max_features=10000)

In [10]:
# Fit the vectorizer on the training data and transform both train and test sets
X_train_tfidf_cleaned = tfidf_cleaned.fit_transform(X_train_cleaned)
X_test_tfidf_cleaned = tfidf_cleaned.transform(X_test_cleaned)

In [11]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
import optuna

In [12]:
# Function to optimize LightGBM hyperparameters
def objective(trial):
    # Define hyperparameters to be tuned
    param = {
        "objective": "multiclass",
        "num_class": 3,  # Assuming 3 categories (-1, 0, 1)
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1),
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 20),
        "metric": "multi_logloss",
        "is_unbalance": True,
        "class_weight": "balanced",
    }

    # Define the LightGBM model with the trial parameters
    model = lgb.LGBMClassifier(**param)

    # Perform cross-validation
    scores = cross_val_score(model, X_train_tfidf_cleaned, y_train_cleaned, cv=3, scoring='accuracy')

    # Return the average score across folds
    return scores.mean()

In [13]:
# Create an Optuna study to optimize the hyperparameters
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2024-12-17 10:18:53,972] A new study created in memory with name: no-name-78ca4483-4914-4dd5-bc50-7a7010e6ddb4


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.247814 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.230672 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.242646 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:19:11,697] Trial 0 finished with value: 0.7850932379627058 and parameters: {'learning_rate': 0.09946746832928899, 'n_estimators': 294, 'max_depth': 3}. Best is trial 0 with value: 0.7850932379627058.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.221755 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.202391 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.169261 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:19:38,842] Trial 1 finished with value: 0.7415868026382771 and parameters: {'learning_rate': 0.027445544161250585, 'n_estimators': 124, 'max_depth': 13}. Best is trial 0 with value: 0.7850932379627058.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.164611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.167644 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.160899 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:21:07,132] Trial 2 finished with value: 0.8448634430506236 and parameters: {'learning_rate': 0.07520920234274267, 'n_estimators': 495, 'max_depth': 16}. Best is trial 2 with value: 0.8448634430506236.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.162227 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.202567 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.167388 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:22:39,169] Trial 3 finished with value: 0.8448974738970364 and parameters: {'learning_rate': 0.09978324249290124, 'n_estimators': 407, 'max_depth': 19}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.163938 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.168851 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.157299 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:22:51,449] Trial 4 finished with value: 0.7390296031954483 and parameters: {'learning_rate': 0.06259881242691888, 'n_estimators': 94, 'max_depth': 7}. Best is trial 3 with value: 0.8448974738970364.






[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.162089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.165757 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.178354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:23:11,271] Trial 5 finished with value: 0.775341712524261 and parameters: {'learning_rate': 0.06425452270386565, 'n_estimators': 105, 'max_depth': 11}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.177211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.211945 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.237110 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:24:55,760] Trial 6 finished with value: 0.8445907219818092 and parameters: {'learning_rate': 0.06942261276567246, 'n_estimators': 356, 'max_depth': 19}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.235356 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.228827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.238474 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:25:33,759] Trial 7 finished with value: 0.8190186473414425 and parameters: {'learning_rate': 0.07833325919070334, 'n_estimators': 122, 'max_depth': 18}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.306002 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.245288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.198849 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:26:47,561] Trial 8 finished with value: 0.8057553533820565 and parameters: {'learning_rate': 0.02980173813872301, 'n_estimators': 318, 'max_depth': 14}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.278594 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.219580 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.242208 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:28:22,437] Trial 9 finished with value: 0.8037095756928886 and parameters: {'learning_rate': 0.021059160847641433, 'n_estimators': 389, 'max_depth': 15}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228810 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.275005 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.290371 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:31:03,174] Trial 10 finished with value: 0.8443519689847371 and parameters: {'learning_rate': 0.09431324052182262, 'n_estimators': 488, 'max_depth': 20}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.230820 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.219100 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223431 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:33:01,050] Trial 11 finished with value: 0.8437382559053807 and parameters: {'learning_rate': 0.08534888456707351, 'n_estimators': 486, 'max_depth': 17}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.176609 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.220245 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.348904 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:34:54,945] Trial 12 finished with value: 0.8368168288401456 and parameters: {'learning_rate': 0.04276730350744626, 'n_estimators': 421, 'max_depth': 16}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.257030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.284615 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.279349 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:35:42,780] Trial 13 finished with value: 0.7989361199930295 and parameters: {'learning_rate': 0.05104031661337233, 'n_estimators': 224, 'max_depth': 10}. Best is trial 3 with value: 0.8448974738970364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226798 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.225635 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.204892 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:37:56,210] Trial 14 finished with value: 0.8449998175349576 and parameters: {'learning_rate': 0.08189783746288573, 'n_estimators': 430, 'max_depth': 20}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.217253 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.694183 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.337610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:39:44,902] Trial 15 finished with value: 0.8433973824692163 and parameters: {'learning_rate': 0.08920596055097677, 'n_estimators': 230, 'max_depth': 20}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.406376 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.286016 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.203867 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:40:54,845] Trial 16 finished with value: 0.840942463889638 and parameters: {'learning_rate': 0.0987404585550806, 'n_estimators': 427, 'max_depth': 9}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.220946 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.237452 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238683 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:42:43,534] Trial 17 finished with value: 0.7274711228145416 and parameters: {'learning_rate': 0.004415506521225326, 'n_estimators': 353, 'max_depth': 20}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.212282 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.243114 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.209019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:44:07,956] Trial 18 finished with value: 0.8428518287321731 and parameters: {'learning_rate': 0.08245788070910151, 'n_estimators': 425, 'max_depth': 13}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.209299 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.201349 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.208554 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:45:07,670] Trial 19 finished with value: 0.8276450170219797 and parameters: {'learning_rate': 0.05414275231727444, 'n_estimators': 216, 'max_depth': 18}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.164870 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.186217 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.206876 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:46:05,703] Trial 20 finished with value: 0.83483926978048 and parameters: {'learning_rate': 0.07202790807916826, 'n_estimators': 448, 'max_depth': 8}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.242805 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.195419 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.195585 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:47:43,312] Trial 21 finished with value: 0.8444883992687783 and parameters: {'learning_rate': 0.0755964694947439, 'n_estimators': 484, 'max_depth': 17}. Best is trial 14 with value: 0.8449998175349576.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.166729 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.198904 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.240465 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:49:08,123] Trial 22 finished with value: 0.8458181307031136 and parameters: {'learning_rate': 0.0894521367501731, 'n_estimators': 384, 'max_depth': 16}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.253811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.182337 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.240607 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:51:00,767] Trial 23 finished with value: 0.8445225556645323 and parameters: {'learning_rate': 0.09153737382070642, 'n_estimators': 386, 'max_depth': 18}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249202 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.250395 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223557 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:52:57,331] Trial 24 finished with value: 0.8453749031665833 and parameters: {'learning_rate': 0.08409959266245677, 'n_estimators': 332, 'max_depth': 19}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.261305 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.244497 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.229779 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:54:13,460] Trial 25 finished with value: 0.8433632818731693 and parameters: {'learning_rate': 0.08517647013812327, 'n_estimators': 331, 'max_depth': 15}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.246034 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.270104 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227771 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:55:14,681] Trial 26 finished with value: 0.8314637153197149 and parameters: {'learning_rate': 0.059574649146775696, 'n_estimators': 254, 'max_depth': 16}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.262784 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.253302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.284541 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:57:00,682] Trial 27 finished with value: 0.8362712751031025 and parameters: {'learning_rate': 0.038707615110999086, 'n_estimators': 362, 'max_depth': 19}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.277225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.266415 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.216665 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:57:19,032] Trial 28 finished with value: 0.7606805138382996 and parameters: {'learning_rate': 0.06765171760881301, 'n_estimators': 168, 'max_depth': 5}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.263360 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.264610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238329 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:57:37,073] Trial 29 finished with value: 0.7725800036671568 and parameters: {'learning_rate': 0.08150561694707999, 'n_estimators': 286, 'max_depth': 3}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.247298 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.229258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.263152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 10:58:39,465] Trial 30 finished with value: 0.8405674131328293 and parameters: {'learning_rate': 0.09074772387901892, 'n_estimators': 324, 'max_depth': 12}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.248069 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225356 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.265083 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:00:21,210] Trial 31 finished with value: 0.8452725769660706 and parameters: {'learning_rate': 0.09963929519340779, 'n_estimators': 397, 'max_depth': 19}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.244963 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.261700 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.243868 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:02:12,268] Trial 32 finished with value: 0.8447952418585297 and parameters: {'learning_rate': 0.09483857068509545, 'n_estimators': 452, 'max_depth': 17}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.298746 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.206784 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:03:55,923] Trial 33 finished with value: 0.845613575951576 and parameters: {'learning_rate': 0.08739533835466076, 'n_estimators': 382, 'max_depth': 20}. Best is trial 22 with value: 0.8458181307031136.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.253014 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.266494 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.239841 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:05:30,956] Trial 34 finished with value: 0.8459886511207566 and parameters: {'learning_rate': 0.09881308871074221, 'n_estimators': 382, 'max_depth': 18}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.258637 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226350 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.246272 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:05:49,269] Trial 35 finished with value: 0.7889118595358435 and parameters: {'learning_rate': 0.08846467645717017, 'n_estimators': 59, 'max_depth': 18}. Best is trial 34 with value: 0.8459886511207566.






[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228467 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.242904 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.232736 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:06:53,517] Trial 36 finished with value: 0.8420335678762424 and parameters: {'learning_rate': 0.09445472689713717, 'n_estimators': 304, 'max_depth': 14}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.241450 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.263067 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.192101 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:08:21,309] Trial 37 finished with value: 0.8431927928428617 and parameters: {'learning_rate': 0.07607222763670987, 'n_estimators': 376, 'max_depth': 16}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.294587 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.273592 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.207383 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:09:27,516] Trial 38 finished with value: 0.8427836380025244 and parameters: {'learning_rate': 0.09970446257635374, 'n_estimators': 266, 'max_depth': 17}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.196294 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.252274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.310753 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:10:57,165] Trial 39 finished with value: 0.8447271069285883 and parameters: {'learning_rate': 0.07173513433741459, 'n_estimators': 335, 'max_depth': 19}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.214827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.241248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.260868 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:12:17,387] Trial 40 finished with value: 0.8395786388342191 and parameters: {'learning_rate': 0.061019753654193216, 'n_estimators': 358, 'max_depth': 15}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.198610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.265306 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.272696 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:14:02,578] Trial 41 finished with value: 0.845374847366876 and parameters: {'learning_rate': 0.09683017286749242, 'n_estimators': 397, 'max_depth': 19}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.206918 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.270137 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.291402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:15:46,522] Trial 42 finished with value: 0.8445224998648252 and parameters: {'learning_rate': 0.08724235472799484, 'n_estimators': 405, 'max_depth': 18}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.250717 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.287683 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.261374 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:17:25,454] Trial 43 finished with value: 0.8448293494295401 and parameters: {'learning_rate': 0.09433752682623861, 'n_estimators': 373, 'max_depth': 19}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.272316 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228289 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.228195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:19:33,348] Trial 44 finished with value: 0.844761141262483 and parameters: {'learning_rate': 0.07954483974839237, 'n_estimators': 462, 'max_depth': 20}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.233971 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.228290 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.284558 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:21:03,927] Trial 45 finished with value: 0.8448975541091155 and parameters: {'learning_rate': 0.09562445672384977, 'n_estimators': 344, 'max_depth': 19}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.203725 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.258830 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.193962 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:22:21,287] Trial 46 finished with value: 0.8443520387343711 and parameters: {'learning_rate': 0.08548200657055914, 'n_estimators': 311, 'max_depth': 18}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.261370 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.242384 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.266944 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:23:59,548] Trial 47 finished with value: 0.8442838201048689 and parameters: {'learning_rate': 0.09099496014096782, 'n_estimators': 406, 'max_depth': 17}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.256666 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249447 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.286170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:25:50,921] Trial 48 finished with value: 0.844999904722 and parameters: {'learning_rate': 0.06667715255453513, 'n_estimators': 384, 'max_depth': 20}. Best is trial 34 with value: 0.8459886511207566.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.274999 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84026
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2999
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.250622 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 83803
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.227815 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84965
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3045
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-17 11:27:02,865] Trial 49 finished with value: 0.8418630439711179 and parameters: {'learning_rate': 0.07765465688146472, 'n_estimators': 298, 'max_depth': 16}. Best is trial 34 with value: 0.8459886511207566.


In [14]:
# Extract the best hyperparameters
best_params = study.best_params
best_params

{'learning_rate': 0.09881308871074221, 'n_estimators': 382, 'max_depth': 18}

In [15]:
best_model = lgb.LGBMClassifier(

    objective='multiclass',
    num_class=3,
    metric="multi_logloss",
    is_unbalance= True,
    class_weight= "balanced",
    reg_alpha= 0.1,  # L1 regularization
    reg_lambda= 0.1,  # L2 regularization
    learning_rate= 0.0988,
    max_depth= 18,
    n_estimators=382
)

In [16]:
# Fit the model on the resampled training data
best_model.fit(X_train_tfidf_cleaned, y_train_cleaned)



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.449667 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 131997
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4439
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




In [17]:
# Predict on the train set
y_train_pred = best_model.predict(X_train_tfidf_cleaned)



In [18]:
# Calculate accuracy on the test set
accuracy_train = accuracy_score(y_train_cleaned, y_train_pred)
accuracy_train

0.9348085512632548

In [19]:
# Generate classification report
report_train = classification_report(y_train_cleaned, y_train_pred)
print(report_train)

              precision    recall  f1-score   support

          -1       0.92      0.91      0.92      6601
           0       0.89      0.98      0.93     10134
           1       0.98      0.91      0.94     12594

    accuracy                           0.93     29329
   macro avg       0.93      0.93      0.93     29329
weighted avg       0.94      0.93      0.93     29329



In [20]:
# Predict on the test set
y_pred = best_model.predict(X_test_tfidf_cleaned)



In [21]:
# Calculate accuracy on the test set
accuracy = accuracy_score(y_test_cleaned, y_pred)
accuracy

0.8636301650075003

In [22]:
# Generate classification report
report = classification_report(y_test_cleaned, y_pred)
print(report)

              precision    recall  f1-score   support

          -1       0.81      0.77      0.79      1647
           0       0.84      0.97      0.90      2510
           1       0.92      0.83      0.87      3176

    accuracy                           0.86      7333
   macro avg       0.86      0.86      0.85      7333
weighted avg       0.87      0.86      0.86      7333



In [25]:
import re
import numpy as np

# Assuming you have pre-trained tfidf_vectorizer and lgbm_model loaded
# tfidf_vectorizer: Your trained TF-IDF vectorizer
# lgbm_model: Your trained LightGBM model

# Function to clean and preprocess a YouTube comment (same as used during training)
def preprocess_comment(comment):
    # Lowercasing
    comment = comment.lower()

    # Remove special characters, URLs, punctuation, and extra spaces
    comment = re.sub(r"http\S+|www\S+|https\S+", '', comment, flags=re.MULTILINE)  # Remove URLs
    comment = re.sub(r'\W', ' ', comment)  # Remove special characters
    comment = re.sub(r'\s+', ' ', comment).strip()  # Remove extra spaces and newlines

    return comment

# Prediction function
def predict_sentiment(comment, tfidf_vectorizer, lgbm_model):
    # Step 1: Preprocess the YouTube comment
    cleaned_comment = preprocess_comment(comment)

    # Step 2: Transform the comment using the trained TF-IDF vectorizer
    comment_tfidf = tfidf_vectorizer.transform([cleaned_comment])

    # Step 3: Use the trained LightGBM model to predict the sentiment
    prediction = lgbm_model.predict(comment_tfidf)
    prediction_proba = lgbm_model.predict_proba(comment_tfidf)

    # Step 4: Get the predicted sentiment (label) and probability
    sentiment_class = np.argmax(prediction_proba)
    sentiment_proba = np.max(prediction_proba)

    # Step 5: Return the sentiment label and confidence
    return {
        'sentiment_class': int(prediction[0]),  # -1, 0, or 1 depending on your labels
        'confidence': sentiment_proba
    }

# Example usage:
comment1 = "I absolutely hate this video!"
comment2 = "The explanations were confusing and the video quality was poor."
comment3 = "I didn’t learn anything useful. Really disappointed."
comment4 = "Wow, the explanation was so clear and helpful. Definitely subscribing!"
comment5 = "This is the worst video I’ve seen on this topic, very misleading"
comment6 = "Not much to say about this, just a standard video."
comment7 = "The video is okay, but I expected more depth in the content."
comment8 = "Superb content! Mazaa aa gaya dekh ke. Best video on this topic!"
comment9 = "Poor video quality aur explanation bhi weak tha."
comment10 = "Yeh video theek tha, but I was expecting more depth."
result = predict_sentiment(comment8, tfidf_cleaned, best_model)
print(f"Predicted Sentiment: {result['sentiment_class']}, Confidence: {result['confidence']}")

Predicted Sentiment: 1, Confidence: 0.9913037009654252


