In [1]:
import pandas as pd
import numpy as np
import mlflow

import dagshub
dagshub.init(repo_owner='himadri06', repo_name='youtube-comment-analysis', mlflow=True)

import mlflow

mlflow.set_tracking_uri("https://dagshub.com/himadri06/youtube-comment-analysis.mlflow")
mlflow.set_experiment("Experiment 5-LightGBM with HPT")

2024/12/15 12:50:57 INFO mlflow.tracking.fluent: Experiment with name 'Experiment 5-LightGBM with HPT' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/6f37b7835add40d78e5a71b838cfe798', creation_time=1734247258740, experiment_id='7', last_update_time=1734247258740, lifecycle_stage='active', name='Experiment 5-LightGBM with HPT', tags={}>

In [2]:
df = pd.read_csv(r'C:/Users/User/Desktop/MLOPs/youtube-comment/youtube-comment-analysis/data/processed/reddit_preprocessing.csv').dropna(subset=['clean_comment'])
df.shape

(36662, 2)

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        # Log model type
        mlflow.set_tag("mlflow.runName", f"{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")


# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)
    return accuracy_score(y_test, model.fit(X_train, y_train).predict(X_test))


# Step 7: Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    # Get the best parameters and log only the best model
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    # Log the best model with MLflow, passing the algo_name as "LightGBM"
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test)

# Run the experiment for LightGBM
run_optuna_experiment()


[I 2024-12-15 12:52:18,353] A new study created in memory with name: no-name-92f5045c-4694-48c8-8b55-3443a5ed969b


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.932150 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:52:46,380] Trial 0 finished with value: 0.670999788628197 and parameters: {'n_estimators': 59, 'learning_rate': 0.015181633822008725, 'max_depth': 10}. Best is trial 0 with value: 0.670999788628197.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.312238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:53:18,666] Trial 1 finished with value: 0.7508983301627563 and parameters: {'n_estimators': 203, 'learning_rate': 0.03823523851992057, 'max_depth': 5}. Best is trial 1 with value: 0.7508983301627563.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.345784 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:55:58,745] Trial 2 finished with value: 0.7163390403720143 and parameters: {'n_estimators': 267, 'learning_rate': 0.014392600298064294, 'max_depth': 6}. Best is trial 1 with value: 0.7508983301627563.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.259080 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:56:31,761] Trial 3 finished with value: 0.6063200169097442 and parameters: {'n_estimators': 101, 'learning_rate': 0.0004318067999053045, 'max_depth': 8}. Best is trial 1 with value: 0.7508983301627563.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.267006 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:57:38,993] Trial 4 finished with value: 0.7940181779750581 and parameters: {'n_estimators': 187, 'learning_rate': 0.08323526932485534, 'max_depth': 6}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.278358 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:58:54,611] Trial 5 finished with value: 0.6497569224265483 and parameters: {'n_estimators': 289, 'learning_rate': 0.004200241631149015, 'max_depth': 6}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.012035 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:59:20,656] Trial 6 finished with value: 0.7068273092369478 and parameters: {'n_estimators': 68, 'learning_rate': 0.03128826872901187, 'max_depth': 9}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.241798 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 12:59:57,505] Trial 7 finished with value: 0.6695201860071867 and parameters: {'n_estimators': 176, 'learning_rate': 0.012609910002869028, 'max_depth': 5}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.468899 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:02:25,226] Trial 8 finished with value: 0.598287888395688 and parameters: {'n_estimators': 163, 'learning_rate': 0.0020037823963290624, 'max_depth': 6}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.339208 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:03:34,722] Trial 9 finished with value: 0.639294018177975 and parameters: {'n_estimators': 214, 'learning_rate': 0.0014139909257402678, 'max_depth': 9}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.750748 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:03:53,689] Trial 10 finished with value: 0.7455083491862186 and parameters: {'n_estimators': 134, 'learning_rate': 0.0915704912716962, 'max_depth': 3}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.350854 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:04:17,715] Trial 11 finished with value: 0.7892623124075249 and parameters: {'n_estimators': 221, 'learning_rate': 0.09970719053533832, 'max_depth': 4}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.335509 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:04:37,025] Trial 12 finished with value: 0.7809131261889664 and parameters: {'n_estimators': 236, 'learning_rate': 0.09954558530377633, 'max_depth': 3}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.323831 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:05:09,234] Trial 13 finished with value: 0.543859649122807 and parameters: {'n_estimators': 239, 'learning_rate': 0.00010342278286258394, 'max_depth': 4}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.281387 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:06:06,581] Trial 14 finished with value: 0.7586134009723103 and parameters: {'n_estimators': 169, 'learning_rate': 0.03536709381872369, 'max_depth': 7}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.262045 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:06:22,729] Trial 15 finished with value: 0.5986049461001902 and parameters: {'n_estimators': 130, 'learning_rate': 0.004282415232556684, 'max_depth': 4}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.224744 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:06:47,736] Trial 16 finished with value: 0.7694990488268865 and parameters: {'n_estimators': 205, 'learning_rate': 0.0515698985581037, 'max_depth': 5}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.252847 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:07:42,451] Trial 17 finished with value: 0.6986894948213909 and parameters: {'n_estimators': 256, 'learning_rate': 0.009183587131698059, 'max_depth': 7}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.221021 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:08:01,845] Trial 18 finished with value: 0.549566687803847 and parameters: {'n_estimators': 186, 'learning_rate': 0.0006273939482094129, 'max_depth': 4}. Best is trial 4 with value: 0.7940181779750581.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.368783 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:10:08,438] Trial 19 finished with value: 0.8044810822236313 and parameters: {'n_estimators': 300, 'learning_rate': 0.062368934034233846, 'max_depth': 8}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.369361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:12:07,993] Trial 20 finished with value: 0.772563939970408 and parameters: {'n_estimators': 288, 'learning_rate': 0.02321390664612517, 'max_depth': 8}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.472711 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:14:54,068] Trial 21 finished with value: 0.8014161910801099 and parameters: {'n_estimators': 229, 'learning_rate': 0.06676468316948539, 'max_depth': 8}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.619346 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:16:50,673] Trial 22 finished with value: 0.8024730500951173 and parameters: {'n_estimators': 300, 'learning_rate': 0.05722436308164144, 'max_depth': 8}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.321620 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:18:00,310] Trial 23 finished with value: 0.7058761361234411 and parameters: {'n_estimators': 300, 'learning_rate': 0.007942342342687725, 'max_depth': 8}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.405224 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:20:31,755] Trial 24 finished with value: 0.8018389346861129 and parameters: {'n_estimators': 267, 'learning_rate': 0.05308745112816504, 'max_depth': 9}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 7.268948 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:22:31,613] Trial 25 finished with value: 0.7764743183259353 and parameters: {'n_estimators': 271, 'learning_rate': 0.021921211104151756, 'max_depth': 10}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.224776 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:23:21,263] Trial 26 finished with value: 0.796871697315578 and parameters: {'n_estimators': 252, 'learning_rate': 0.047036864732642104, 'max_depth': 9}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.221230 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:24:17,839] Trial 27 finished with value: 0.7008032128514057 and parameters: {'n_estimators': 277, 'learning_rate': 0.006674168801800163, 'max_depth': 9}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.270364 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:25:02,949] Trial 28 finished with value: 0.7655886704713591 and parameters: {'n_estimators': 296, 'learning_rate': 0.023249084924426976, 'max_depth': 7}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.265251 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2024-12-15 13:26:21,875] Trial 29 finished with value: 0.8044810822236313 and parameters: {'n_estimators': 258, 'learning_rate': 0.05557948974049355, 'max_depth': 10}. Best is trial 19 with value: 0.8044810822236313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.253194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/12/15 13:27:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/himadri06/youtube-comment-analysis.mlflow/#/experiments/7/runs/440d4ffe615b4c08a7d867bf74a8a2d7.
2024/12/15 13:27:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/himadri06/youtube-comment-analysis.mlflow/#/experiments/7.
