In [1]:
!pip install mlflow boto3 awscli optuna lightgbm imbalanced-learn

Collecting mlflow
  Downloading mlflow-3.4.0-py3-none-any.whl.metadata (30 kB)
Collecting boto3
  Downloading boto3-1.40.53-py3-none-any.whl.metadata (6.6 kB)
Collecting awscli
  Downloading awscli-1.42.53-py3-none-any.whl.metadata (11 kB)
Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting mlflow-skinny==3.4.0 (from mlflow)
  Downloading mlflow_skinny-3.4.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.4.0 (from mlflow)
  Downloading mlflow_tracing-3.4.0-py3-none-any.whl.metadata (19 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting fastmcp<3,>=2.0.0 (from mlflow)
  Downloading fastmcp-2.12.4-py3-none-any.whl.metadata (19 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.

In [2]:
import os, getpass

os.environ["AWS_ACCESS_KEY_ID"] = getpass.getpass("Enter AWS Access Key ID: ")
os.environ["AWS_SECRET_ACCESS_KEY"] = getpass.getpass("Enter AWS Secret Access Key: ")
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

Enter AWS Access Key ID: ··········
Enter AWS Secret Access Key: ··········


In [3]:
import boto3
s3 = boto3.client('s3')
print(s3.list_buckets())

{'ResponseMetadata': {'RequestId': 'PTKVDS2ED1VXC8X4', 'HostId': 'QN9/w9HoWnB7WESqNADfHGhBtTRDc3/LqTtrSd7AZkqx+mBN2CRvFXKo/Alv0QariqwrNLmMfHG6gtw1muD3B0Qk1dnawLXft6FWGprDyvw=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'QN9/w9HoWnB7WESqNADfHGhBtTRDc3/LqTtrSd7AZkqx+mBN2CRvFXKo/Alv0QariqwrNLmMfHG6gtw1muD3B0Qk1dnawLXft6FWGprDyvw=', 'x-amz-request-id': 'PTKVDS2ED1VXC8X4', 'date': 'Thu, 16 Oct 2025 11:39:35 GMT', 'content-type': 'application/xml', 'transfer-encoding': 'chunked', 'server': 'AmazonS3'}, 'RetryAttempts': 0}, 'Buckets': [{'Name': 'project1-mlflow-bucket', 'CreationDate': datetime.datetime(2025, 10, 12, 12, 4, 13, tzinfo=tzlocal())}], 'Owner': {'ID': '866466d6c4d8a9893e39cdce3c468d75133f23de2c907deeda4f1a78a7ca565b'}}


In [4]:
import mlflow
mlflow.set_tracking_uri("http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/")

print("Tracking URI:", mlflow.get_tracking_uri())

Tracking URI: http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/


In [5]:
mlflow.set_experiment("exp 6 -LightGBM with HP Tuning")

2025/10/16 11:40:23 INFO mlflow.tracking.fluent: Experiment with name 'exp 6 -LightGBM with HP Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://project1-mlflow-bucket/131227091685296502', creation_time=1760614823705, experiment_id='131227091685296502', last_update_time=1760614823705, lifecycle_stage='active', name='exp 6 -LightGBM with HP Tuning', tags={}>

In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna

In [7]:
url = 'https://raw.githubusercontent.com/adityasahusomu/Youtube_Comment_Analyzer/refs/heads/main/cleaned_reddit_dataset.csv'
df = pd.read_csv(url).dropna(subset=['clean_comment'])
df.shape

(36662, 2)

In [8]:
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

df = df.dropna(subset=['category'])

ngram_range = (1, 3)
max_features = 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        mlflow.log_param("algo_name", model_name)

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")


def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)
    return accuracy_score(y_test, model.fit(X_train, y_train).predict(X_test))


# Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test)

run_optuna_experiment()


[I 2025-10-16 12:04:30,574] A new study created in memory with name: no-name-fb67aed8-e09b-4639-95c8-dcadbbd31b73


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.259766 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:04:50,842] Trial 0 finished with value: 0.6002959205242021 and parameters: {'n_estimators': 211, 'learning_rate': 0.0006664776261648478, 'max_depth': 7}. Best is trial 0 with value: 0.6002959205242021.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.278887 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:04:56,283] Trial 1 finished with value: 0.6043119847812302 and parameters: {'n_estimators': 52, 'learning_rate': 0.0004391603575299274, 'max_depth': 8}. Best is trial 1 with value: 0.6043119847812302.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.251141 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:05:11,004] Trial 2 finished with value: 0.6682519551891778 and parameters: {'n_estimators': 234, 'learning_rate': 0.0092132670240146, 'max_depth': 5}. Best is trial 2 with value: 0.6682519551891778.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249627 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:05:36,754] Trial 3 finished with value: 0.6279856267173959 and parameters: {'n_estimators': 226, 'learning_rate': 0.0001451213635611626, 'max_depth': 10}. Best is trial 2 with value: 0.6682519551891778.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.256155 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:05:54,840] Trial 4 finished with value: 0.6430987106320016 and parameters: {'n_estimators': 154, 'learning_rate': 0.0015081631343313473, 'max_depth': 10}. Best is trial 2 with value: 0.6682519551891778.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.255423 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:06:00,594] Trial 5 finished with value: 0.7522722468822659 and parameters: {'n_estimators': 59, 'learning_rate': 0.07258668740270972, 'max_depth': 9}. Best is trial 5 with value: 0.7522722468822659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.251749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:06:08,170] Trial 6 finished with value: 0.6767068273092369 and parameters: {'n_estimators': 84, 'learning_rate': 0.02333733120727803, 'max_depth': 6}. Best is trial 5 with value: 0.7522722468822659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.255776 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:06:31,311] Trial 7 finished with value: 0.7491016698372437 and parameters: {'n_estimators': 235, 'learning_rate': 0.01766020285650718, 'max_depth': 9}. Best is trial 5 with value: 0.7522722468822659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.254133 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:06:58,146] Trial 8 finished with value: 0.6223842739378567 and parameters: {'n_estimators': 248, 'learning_rate': 0.0003056025956087288, 'max_depth': 9}. Best is trial 5 with value: 0.7522722468822659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.268535 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:07:08,463] Trial 9 finished with value: 0.6018812090467132 and parameters: {'n_estimators': 147, 'learning_rate': 0.0036217721790395094, 'max_depth': 5}. Best is trial 5 with value: 0.7522722468822659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.269989 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:07:12,107] Trial 10 finished with value: 0.7272246882265906 and parameters: {'n_estimators': 118, 'learning_rate': 0.07830976222651695, 'max_depth': 3}. Best is trial 5 with value: 0.7522722468822659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.250499 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:07:32,108] Trial 11 finished with value: 0.8100824350031706 and parameters: {'n_estimators': 277, 'learning_rate': 0.09611011085297354, 'max_depth': 8}. Best is trial 11 with value: 0.8100824350031706.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.418607 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:07:53,920] Trial 12 finished with value: 0.8042697104206299 and parameters: {'n_estimators': 289, 'learning_rate': 0.06390371402286878, 'max_depth': 8}. Best is trial 11 with value: 0.8100824350031706.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.259030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:08:14,060] Trial 13 finished with value: 0.8101881209046713 and parameters: {'n_estimators': 300, 'learning_rate': 0.08951827837207484, 'max_depth': 7}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.247702 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:08:36,562] Trial 14 finished with value: 0.7712957091523991 and parameters: {'n_estimators': 292, 'learning_rate': 0.025876757093775987, 'max_depth': 7}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.256703 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:08:58,253] Trial 15 finished with value: 0.6528218135700697 and parameters: {'n_estimators': 268, 'learning_rate': 0.00463078933007225, 'max_depth': 6}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.245122 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:09:06,737] Trial 16 finished with value: 0.7292327203551047 and parameters: {'n_estimators': 193, 'learning_rate': 0.036061742523897745, 'max_depth': 4}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249358 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:09:31,842] Trial 17 finished with value: 0.7146480659480026 and parameters: {'n_estimators': 261, 'learning_rate': 0.010775142473328619, 'max_depth': 8}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.253419 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:09:51,026] Trial 18 finished with value: 0.8094483195941662 and parameters: {'n_estimators': 293, 'learning_rate': 0.09231207342970532, 'max_depth': 7}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.429434 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:10:03,780] Trial 19 finished with value: 0.5919467343056436 and parameters: {'n_estimators': 186, 'learning_rate': 0.0016993967432867485, 'max_depth': 5}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.424389 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:10:20,400] Trial 20 finished with value: 0.7821813570069752 and parameters: {'n_estimators': 263, 'learning_rate': 0.045105558811630066, 'max_depth': 6}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.243633 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:10:39,400] Trial 21 finished with value: 0.8079687169731558 and parameters: {'n_estimators': 295, 'learning_rate': 0.0911954658073507, 'max_depth': 7}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.250264 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:10:59,745] Trial 22 finished with value: 0.8101881209046713 and parameters: {'n_estimators': 278, 'learning_rate': 0.09944270750503464, 'max_depth': 8}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.257153 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:11:21,745] Trial 23 finished with value: 0.7912703445360388 and parameters: {'n_estimators': 274, 'learning_rate': 0.03982597053644128, 'max_depth': 8}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249193 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:11:43,169] Trial 24 finished with value: 0.7036567321919256 and parameters: {'n_estimators': 210, 'learning_rate': 0.00909293839179296, 'max_depth': 9}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.408396 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:12:03,592] Trial 25 finished with value: 0.7960262101035722 and parameters: {'n_estimators': 257, 'learning_rate': 0.04862851519459837, 'max_depth': 8}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.248263 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:12:27,003] Trial 26 finished with value: 0.7376875924751638 and parameters: {'n_estimators': 283, 'learning_rate': 0.015650656108642975, 'max_depth': 7}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.253515 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:12:49,354] Trial 27 finished with value: 0.7808074402874656 and parameters: {'n_estimators': 249, 'learning_rate': 0.03062587474181578, 'max_depth': 9}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.244305 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:13:11,692] Trial 28 finished with value: 0.8016275628831114 and parameters: {'n_estimators': 300, 'learning_rate': 0.05440560322226029, 'max_depth': 8}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.411552 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-16 12:13:31,465] Trial 29 finished with value: 0.7185584443035299 and parameters: {'n_estimators': 211, 'learning_rate': 0.016277616704441597, 'max_depth': 7}. Best is trial 13 with value: 0.8101881209046713.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.257209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




🏃 View run LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/#/experiments/131227091685296502/runs/8de92f47ecad448bb88f86bb1fb5dfa9
🧪 View experiment at: http://ec2-3-15-32-230.us-east-2.compute.amazonaws.com:5000/#/experiments/131227091685296502
