In [1]:
!pip install mlflow boto3 awscli optuna lightgbm imbalanced-learn

Collecting mlflow
  Downloading mlflow-3.5.0-py3-none-any.whl.metadata (30 kB)
Collecting boto3
  Downloading boto3-1.40.55-py3-none-any.whl.metadata (6.6 kB)
Collecting awscli
  Downloading awscli-1.42.55-py3-none-any.whl.metadata (11 kB)
Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting mlflow-skinny==3.5.0 (from mlflow)
  Downloading mlflow_skinny-3.5.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.5.0 (from mlflow)
  Downloading mlflow_tracing-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting fastmcp<3,>=2.0.0 (from mlflow)
  Downloading fastmcp-2.12.5-py3-none-any.whl.metadata (19 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlfl

In [2]:
import os, getpass

os.environ["AWS_ACCESS_KEY_ID"] = getpass.getpass("Enter AWS Access Key ID: ")
os.environ["AWS_SECRET_ACCESS_KEY"] = getpass.getpass("Enter AWS Secret Access Key: ")
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

Enter AWS Access Key ID: ··········
Enter AWS Secret Access Key: ··········


In [3]:
import boto3
s3 = boto3.client('s3')
print(s3.list_buckets())

{'ResponseMetadata': {'RequestId': 'V6NAW1C4GCCG7HPZ', 'HostId': 'Angc8FHaqKWiAP3PA/8YXSmS9LY5tqXxhyrFAYCRMylmnRklaCl8y2e0ELFfs+/VLeFpkjiZ5Uk=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'Angc8FHaqKWiAP3PA/8YXSmS9LY5tqXxhyrFAYCRMylmnRklaCl8y2e0ELFfs+/VLeFpkjiZ5Uk=', 'x-amz-request-id': 'V6NAW1C4GCCG7HPZ', 'date': 'Fri, 17 Oct 2025 23:50:20 GMT', 'content-type': 'application/xml', 'transfer-encoding': 'chunked', 'server': 'AmazonS3'}, 'RetryAttempts': 0}, 'Buckets': [{'Name': 'project1-mlflow-bucket', 'CreationDate': datetime.datetime(2025, 10, 12, 12, 4, 13, tzinfo=tzlocal())}], 'Owner': {'ID': '866466d6c4d8a9893e39cdce3c468d75133f23de2c907deeda4f1a78a7ca565b'}}


In [4]:
import mlflow
mlflow.set_tracking_uri("http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/")

print("Tracking URI:", mlflow.get_tracking_uri())

Tracking URI: http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/


In [5]:
mlflow.set_experiment("exp 6 -LightGBM with HP Tuning")

<Experiment: artifact_location='s3://project1-mlflow-bucket/131227091685296502', creation_time=1760614823705, experiment_id='131227091685296502', last_update_time=1760614823705, lifecycle_stage='active', name='exp 6 -LightGBM with HP Tuning', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna

In [7]:
url = 'https://raw.githubusercontent.com/adityasahusomu/Youtube_Comment_Analyzer/refs/heads/main/cleaned_reddit_dataset.csv'
df = pd.read_csv(url).dropna(subset=['clean_comment'])
df.shape

(36662, 2)

In [8]:
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

df = df.dropna(subset=['category'])

ngram_range = (1, 3)
max_features = 10000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"Retrained_{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        mlflow.log_param("algo_name", model_name)

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        reverse_map = {2: -1, 0: 0, 1: 1}

        y_test_original = y_test.map(reverse_map)
        y_pred_original = pd.Series(y_pred).map(reverse_map)

        classification_rep = classification_report(y_test_original, y_pred_original, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")


def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)
    return accuracy_score(y_test, model.fit(X_train, y_train).predict(X_test))


# Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=40)

    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test)

run_optuna_experiment()


[I 2025-10-17 23:57:16,573] A new study created in memory with name: no-name-cef2af41-fb38-4fca-b30c-70fdaa039aee


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 2.582238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-17 23:57:34,619] Trial 0 finished with value: 0.6012470936377088 and parameters: {'n_estimators': 66, 'learning_rate': 0.00011846616335079846, 'max_depth': 8}. Best is trial 0 with value: 0.6012470936377088.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.700314 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-17 23:57:44,631] Trial 1 finished with value: 0.5525258930458676 and parameters: {'n_estimators': 75, 'learning_rate': 0.0011295929704917542, 'max_depth': 4}. Best is trial 0 with value: 0.6012470936377088.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.697340 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-17 23:58:08,224] Trial 2 finished with value: 0.6618051151976326 and parameters: {'n_estimators': 125, 'learning_rate': 0.009310351885162896, 'max_depth': 8}. Best is trial 2 with value: 0.6618051151976326.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 4.088058 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-17 23:58:38,611] Trial 3 finished with value: 0.6373916719509617 and parameters: {'n_estimators': 131, 'learning_rate': 0.004062901172796931, 'max_depth': 8}. Best is trial 2 with value: 0.6618051151976326.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.458172 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-17 23:59:17,499] Trial 4 finished with value: 0.670999788628197 and parameters: {'n_estimators': 270, 'learning_rate': 0.006914882670394229, 'max_depth': 6}. Best is trial 4 with value: 0.670999788628197.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.614925 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:00:11,803] Trial 5 finished with value: 0.6278799408158952 and parameters: {'n_estimators': 295, 'learning_rate': 0.0005723210735241619, 'max_depth': 9}. Best is trial 4 with value: 0.670999788628197.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.497918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:00:41,446] Trial 6 finished with value: 0.6569435637285986 and parameters: {'n_estimators': 248, 'learning_rate': 0.007432046782241323, 'max_depth': 5}. Best is trial 4 with value: 0.670999788628197.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.631173 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:00:50,465] Trial 7 finished with value: 0.5734517015430142 and parameters: {'n_estimators': 84, 'learning_rate': 0.005170848140471022, 'max_depth': 3}. Best is trial 4 with value: 0.670999788628197.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.646194 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:01:31,682] Trial 8 finished with value: 0.6235468188543648 and parameters: {'n_estimators': 217, 'learning_rate': 0.000730421318471571, 'max_depth': 9}. Best is trial 4 with value: 0.670999788628197.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.762103 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:01:43,266] Trial 9 finished with value: 0.573134643838512 and parameters: {'n_estimators': 54, 'learning_rate': 0.00017247588315003341, 'max_depth': 6}. Best is trial 4 with value: 0.670999788628197.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.726471 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:02:16,996] Trial 10 finished with value: 0.8300570703868104 and parameters: {'n_estimators': 300, 'learning_rate': 0.076236158077571, 'max_depth': 6}. Best is trial 10 with value: 0.8300570703868104.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.649449 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:02:51,477] Trial 11 finished with value: 0.8394631156203762 and parameters: {'n_estimators': 300, 'learning_rate': 0.09173999880614682, 'max_depth': 6}. Best is trial 11 with value: 0.8394631156203762.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.489038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:03:11,581] Trial 12 finished with value: 0.8148383005707038 and parameters: {'n_estimators': 211, 'learning_rate': 0.09948100221837189, 'max_depth': 5}. Best is trial 11 with value: 0.8394631156203762.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.843091 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:03:50,021] Trial 13 finished with value: 0.844853096596914 and parameters: {'n_estimators': 300, 'learning_rate': 0.09060017304423033, 'max_depth': 7}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.570191 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:04:25,860] Trial 14 finished with value: 0.7535404777002748 and parameters: {'n_estimators': 245, 'learning_rate': 0.023635367318534353, 'max_depth': 7}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.486791 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:04:57,852] Trial 15 finished with value: 0.7708729655463961 and parameters: {'n_estimators': 176, 'learning_rate': 0.029653224333827968, 'max_depth': 10}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.037355 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:05:35,356] Trial 16 finished with value: 0.7844007609384908 and parameters: {'n_estimators': 268, 'learning_rate': 0.03446854052970797, 'max_depth': 7}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.572108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:05:55,930] Trial 17 finished with value: 0.6814626928767702 and parameters: {'n_estimators': 202, 'learning_rate': 0.017401809588836366, 'max_depth': 4}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.521183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:06:29,761] Trial 18 finished with value: 0.8203339674487423 and parameters: {'n_estimators': 271, 'learning_rate': 0.0596362392783557, 'max_depth': 7}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.680735 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:06:58,740] Trial 19 finished with value: 0.696153033185373 and parameters: {'n_estimators': 241, 'learning_rate': 0.01461217096396342, 'max_depth': 5}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.584566 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:07:10,883] Trial 20 finished with value: 0.7119002325089833 and parameters: {'n_estimators': 162, 'learning_rate': 0.045783619749502236, 'max_depth': 3}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.931881 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:07:44,929] Trial 21 finished with value: 0.8300570703868104 and parameters: {'n_estimators': 296, 'learning_rate': 0.08136922205383693, 'max_depth': 6}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.663129 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:08:17,401] Trial 22 finished with value: 0.8405199746353836 and parameters: {'n_estimators': 298, 'learning_rate': 0.09786036763266638, 'max_depth': 6}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.508784 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:09:03,787] Trial 23 finished with value: 0.6230183893468612 and parameters: {'n_estimators': 278, 'learning_rate': 0.0018190951196051203, 'max_depth': 7}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.719454 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:09:28,626] Trial 24 finished with value: 0.7754174593109279 and parameters: {'n_estimators': 232, 'learning_rate': 0.0468278019805662, 'max_depth': 5}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.707045 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:09:53,734] Trial 25 finished with value: 0.6837877827097865 and parameters: {'n_estimators': 261, 'learning_rate': 0.013632488221612366, 'max_depth': 4}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.581104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:10:26,052] Trial 26 finished with value: 0.8403086028323822 and parameters: {'n_estimators': 285, 'learning_rate': 0.0990703066025973, 'max_depth': 6}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.710945 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:11:07,294] Trial 27 finished with value: 0.8092369477911646 and parameters: {'n_estimators': 283, 'learning_rate': 0.043010474750127134, 'max_depth': 8}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.621823 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:11:48,989] Trial 28 finished with value: 0.6276685690128937 and parameters: {'n_estimators': 254, 'learning_rate': 0.0022930993692145737, 'max_depth': 7}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.525533 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:12:28,986] Trial 29 finished with value: 0.7664341576833651 and parameters: {'n_estimators': 228, 'learning_rate': 0.023640391940669244, 'max_depth': 9}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.981058 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:12:55,799] Trial 30 finished with value: 0.5612978228704291 and parameters: {'n_estimators': 193, 'learning_rate': 0.00021370966372605502, 'max_depth': 5}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.276231 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:13:28,830] Trial 31 finished with value: 0.8389346861128726 and parameters: {'n_estimators': 285, 'learning_rate': 0.09859270245379062, 'max_depth': 6}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.762530 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:14:00,747] Trial 32 finished with value: 0.8063834284506447 and parameters: {'n_estimators': 282, 'learning_rate': 0.0542547196076624, 'max_depth': 6}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.520871 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:14:40,393] Trial 33 finished with value: 0.8277319805537942 and parameters: {'n_estimators': 298, 'learning_rate': 0.06522342540699905, 'max_depth': 7}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.535371 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:15:03,888] Trial 34 finished with value: 0.7401183682096808 and parameters: {'n_estimators': 264, 'learning_rate': 0.032229150152895944, 'max_depth': 4}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.521811 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:15:42,522] Trial 35 finished with value: 0.8325935320228282 and parameters: {'n_estimators': 283, 'learning_rate': 0.06549146386154255, 'max_depth': 8}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.570931 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:15:59,427] Trial 36 finished with value: 0.7934897484675544 and parameters: {'n_estimators': 128, 'learning_rate': 0.09561380768972454, 'max_depth': 6}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.642549 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:16:16,885] Trial 37 finished with value: 0.6918199112238428 and parameters: {'n_estimators': 91, 'learning_rate': 0.02157232049884551, 'max_depth': 8}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.917014 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:16:56,968] Trial 38 finished with value: 0.6986894948213909 and parameters: {'n_estimators': 300, 'learning_rate': 0.01212569662006585, 'max_depth': 5}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.964650 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-18 00:17:19,156] Trial 39 finished with value: 0.7552314521242867 and parameters: {'n_estimators': 149, 'learning_rate': 0.04563126477588753, 'max_depth': 6}. Best is trial 13 with value: 0.844853096596914.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.607291 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 170427
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 5487
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run Retrained_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/#/experiments/131227091685296502/runs/6061459dc82a49219a73b39b88af25de
🧪 View experiment at: http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/#/experiments/131227091685296502
