In [12]:
import mlflow
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri("http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/")

In [13]:
# Set or create an experiment
mlflow.set_experiment("Exp 6 - LightGBM HP Tuning")

<Experiment: artifact_location='s3://mlflow-bucket-youtube-sentiments-classifier/6', creation_time=1765528637240, experiment_id='6', last_update_time=1765528637240, lifecycle_stage='active', name='Exp 6 - LightGBM HP Tuning', tags={}>

In [14]:
import mlflow
import mlflow.sklearn
import optuna
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier

In [15]:
import pandas as pd

df = pd.read_csv('reddit_preprocessing.csv').dropna()
df.shape

(36662, 2)

In [16]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

In [17]:
# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [18]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [19]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run(run_name=f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Trigrams"):
        mlflow.set_tag("experiment_type", "algorithm_comparison")
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy


In [20]:
# Optuna objective function
def objective_lightgbm(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True),
    }

    model = LGBMClassifier(**params, random_state=42)
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)
    return accuracy

In [21]:
# Run Optuna experiment
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100)

    best_params = study.best_params
    best_model = LGBMClassifier(**best_params, random_state=42)

    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test, best_params, "Best")

    # Visualization (works in Jupyter; for scripts, save instead)
    optuna.visualization.plot_param_importances(study).show()
    optuna.visualization.plot_optimization_history(study).show()

In [22]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-12-12 14:23:01,225] A new study created in memory with name: no-name-af130b56-f462-47ca-af23-31c4e5176741


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.078872 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98828
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:25:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_0_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/1aab2f663c194d0cb2f9b73f6e8436a1.
2025/12/12 14:25:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:25:16,656] Trial 0 finished with value: 0.7595645740858169 and parameters: {'n_estimators': 867, 'learning_rate': 0.005604083951813162, 'max_depth': 9, 'num_leaves': 114, 'min_child_samples': 64, 'colsample_bytree': 0.8760234072369922, 'subsample': 0.5900091800775751, 'reg_alpha': 0.00015109959117332294, 'reg_lambda': 0.011408954629374456}. Best is trial 0 with value: 0.7595645740858169.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077890 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99009
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 969
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:27:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_1_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/208c3e60e1a64665a1e368cebe5f4d8d.
2025/12/12 14:27:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:27:09,716] Trial 1 finished with value: 0.6613823715916297 and parameters: {'n_estimators': 760, 'learning_rate': 0.00018256210916826632, 'max_depth': 12, 'num_leaves': 92, 'min_child_samples': 23, 'colsample_bytree': 0.9151572628713265, 'subsample': 0.880745788841932, 'reg_alpha': 7.105910873906025, 'reg_lambda': 0.12862374824688289}. Best is trial 0 with value: 0.7595645740858169.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98406
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 944
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:28:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_2_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/5b2ea84f09cc434aa8946aff34bcf167.
2025/12/12 14:28:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:29:00,503] Trial 2 finished with value: 0.5984992601986895 and parameters: {'n_estimators': 281, 'learning_rate': 0.00038459075113513107, 'max_depth': 3, 'num_leaves': 26, 'min_child_samples': 97, 'colsample_bytree': 0.5192558701397698, 'subsample': 0.6761135066009101, 'reg_alpha': 0.00047031428753367335, 'reg_lambda': 0.0034586280804491166}. Best is trial 0 with value: 0.7595645740858169.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079317 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99060
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 976
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:33:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_3_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/8db491d1914d46f88102ba4f2d522514.
2025/12/12 14:33:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:33:04,710] Trial 3 finished with value: 0.806277742549144 and parameters: {'n_estimators': 777, 'learning_rate': 0.029135080711196953, 'max_depth': 7, 'num_leaves': 119, 'min_child_samples': 16, 'colsample_bytree': 0.9544474891843511, 'subsample': 0.8115068823334237, 'reg_alpha': 1.169811558791753, 'reg_lambda': 0.028402717802553837}. Best is trial 3 with value: 0.806277742549144.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075348 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98406
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 944
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:35:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_4_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/5575eaaad58e484d889db0e26ad41b48.
2025/12/12 14:35:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:35:15,960] Trial 4 finished with value: 0.776580004227436 and parameters: {'n_estimators': 511, 'learning_rate': 0.024069735385811614, 'max_depth': 8, 'num_leaves': 148, 'min_child_samples': 97, 'colsample_bytree': 0.8426656712069338, 'subsample': 0.9819333535922583, 'reg_alpha': 8.239631926627856, 'reg_lambda': 0.30133644285705546}. Best is trial 3 with value: 0.806277742549144.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068059 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:37:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_5_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/ec76e3b554eb4ad592f5cbef53bcf7dd.
2025/12/12 14:37:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:37:09,654] Trial 5 finished with value: 0.7002747833439019 and parameters: {'n_estimators': 757, 'learning_rate': 0.0001859016608938982, 'max_depth': 13, 'num_leaves': 134, 'min_child_samples': 47, 'colsample_bytree': 0.6252473129777737, 'subsample': 0.8892065865474121, 'reg_alpha': 0.007283120840151045, 'reg_lambda': 0.6575340655130723}. Best is trial 3 with value: 0.806277742549144.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071974 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98870
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:39:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_6_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/5272371fea274441b189caec70008c52.
2025/12/12 14:39:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:39:22,233] Trial 6 finished with value: 0.5791587402240541 and parameters: {'n_estimators': 553, 'learning_rate': 0.00014628800458556788, 'max_depth': 6, 'num_leaves': 90, 'min_child_samples': 58, 'colsample_bytree': 0.9612988425871023, 'subsample': 0.8356303516512114, 'reg_alpha': 5.5898957069079955, 'reg_lambda': 1.0448887562944429}. Best is trial 3 with value: 0.806277742549144.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99001
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:41:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_7_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/0447811e6fe74b9fa40eaf3fdb48b2c0.
2025/12/12 14:41:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:41:18,938] Trial 7 finished with value: 0.7974001268230818 and parameters: {'n_estimators': 786, 'learning_rate': 0.009711431409746618, 'max_depth': 13, 'num_leaves': 84, 'min_child_samples': 25, 'colsample_bytree': 0.571980362230063, 'subsample': 0.9307695508540288, 'reg_alpha': 0.02325380263109848, 'reg_lambda': 0.026252099144710814}. Best is trial 3 with value: 0.806277742549144.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088815 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99060
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 976
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:43:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_8_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/724eebd8edac4cc8963387b92c6b2454.
2025/12/12 14:43:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:43:57,274] Trial 8 finished with value: 0.5630944831959417 and parameters: {'n_estimators': 343, 'learning_rate': 0.000777207312252749, 'max_depth': 3, 'num_leaves': 111, 'min_child_samples': 16, 'colsample_bytree': 0.8554058616399558, 'subsample': 0.7555448792466508, 'reg_alpha': 0.36453792336826746, 'reg_lambda': 0.0002523942127769524}. Best is trial 3 with value: 0.806277742549144.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085046 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 962
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:46:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_9_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/e50e4239420047cfbfc9dcb74841fb51.
2025/12/12 14:46:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:46:06,837] Trial 9 finished with value: 0.7362079898541535 and parameters: {'n_estimators': 910, 'learning_rate': 0.011017104945718156, 'max_depth': 4, 'num_leaves': 64, 'min_child_samples': 52, 'colsample_bytree': 0.8559882751406462, 'subsample': 0.8445425905991764, 'reg_alpha': 7.377662735358554, 'reg_lambda': 0.00010760578312885253}. Best is trial 3 with value: 0.806277742549144.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.075726 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:50:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_10_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/1b9d1f4624af4da4ad9b9c439167fe77.
2025/12/12 14:50:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:50:29,066] Trial 10 finished with value: 0.8079687169731558 and parameters: {'n_estimators': 518, 'learning_rate': 0.06234844135454427, 'max_depth': 7, 'num_leaves': 48, 'min_child_samples': 36, 'colsample_bytree': 0.7247323077974055, 'subsample': 0.5241402122867868, 'reg_alpha': 0.3191356991982003, 'reg_lambda': 4.385835978556548}. Best is trial 10 with value: 0.8079687169731558.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072145 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:51:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_11_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/f54426324c9d4d35bcfbb3f0556ce3ac.
2025/12/12 14:51:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:51:52,666] Trial 11 finished with value: 0.8126188966391883 and parameters: {'n_estimators': 571, 'learning_rate': 0.09971211885456116, 'max_depth': 7, 'num_leaves': 47, 'min_child_samples': 37, 'colsample_bytree': 0.7090838237799358, 'subsample': 0.5113517629367776, 'reg_alpha': 0.29319525359174253, 'reg_lambda': 8.668319472023727}. Best is trial 11 with value: 0.8126188966391883.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.083152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:54:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_12_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/948ad28676c244f299dc6bfce7948aa4.
2025/12/12 14:54:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:54:11,680] Trial 12 finished with value: 0.8157894736842105 and parameters: {'n_estimators': 526, 'learning_rate': 0.08508707772117202, 'max_depth': 10, 'num_leaves': 36, 'min_child_samples': 39, 'colsample_bytree': 0.726218729255887, 'subsample': 0.5032831242870889, 'reg_alpha': 0.15632556180010662, 'reg_lambda': 6.1669231935506446}. Best is trial 12 with value: 0.8157894736842105.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085244 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98781
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:56:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_13_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/3eec6579c5c54ef986f1e0ba0587aff8.
2025/12/12 14:56:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:56:39,219] Trial 13 finished with value: 0.8149439864722046 and parameters: {'n_estimators': 615, 'learning_rate': 0.07603816274525395, 'max_depth': 11, 'num_leaves': 20, 'min_child_samples': 75, 'colsample_bytree': 0.7243578028150143, 'subsample': 0.515343382030195, 'reg_alpha': 0.09146265189977117, 'reg_lambda': 6.98674065513769}. Best is trial 12 with value: 0.8157894736842105.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079941 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98781
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 14:59:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_14_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/be831911106b4bd597c18bdde96a397c.
2025/12/12 14:59:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 14:59:08,899] Trial 14 finished with value: 0.6591629676601142 and parameters: {'n_estimators': 134, 'learning_rate': 0.0021876264173337997, 'max_depth': 10, 'num_leaves': 26, 'min_child_samples': 75, 'colsample_bytree': 0.7775982794859929, 'subsample': 0.6171792130441957, 'reg_alpha': 0.03826530818880319, 'reg_lambda': 2.4799546911611476}. Best is trial 12 with value: 0.8157894736842105.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081793 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98781
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:00:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_15_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/8ec4f3bd0cf641e18598cfe374a72949.
2025/12/12 15:00:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:00:13,012] Trial 15 finished with value: 0.8050095117311351 and parameters: {'n_estimators': 636, 'learning_rate': 0.03791615527685614, 'max_depth': 11, 'num_leaves': 43, 'min_child_samples': 77, 'colsample_bytree': 0.6655717997246289, 'subsample': 0.5915708393543848, 'reg_alpha': 0.00378102301651555, 'reg_lambda': 9.871702286806272}. Best is trial 12 with value: 0.8157894736842105.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.083734 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98781
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:02:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_16_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/066751b98e8c4146894936f5e27c4850.
2025/12/12 15:02:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:02:01,603] Trial 16 finished with value: 0.7056647643204397 and parameters: {'n_estimators': 412, 'learning_rate': 0.002493961806531757, 'max_depth': 15, 'num_leaves': 21, 'min_child_samples': 79, 'colsample_bytree': 0.780519950688061, 'subsample': 0.6776824661758667, 'reg_alpha': 0.08228329363329387, 'reg_lambda': 0.14331178387270624}. Best is trial 12 with value: 0.8157894736842105.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98828
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:04:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_17_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/2956e918db9846789d5638a1188e37ba.
2025/12/12 15:04:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:04:56,482] Trial 17 finished with value: 0.8023673641936165 and parameters: {'n_estimators': 680, 'learning_rate': 0.015098081619251722, 'max_depth': 15, 'num_leaves': 68, 'min_child_samples': 65, 'colsample_bytree': 0.6336658426697727, 'subsample': 0.5076507671714161, 'reg_alpha': 0.0044297589605252, 'reg_lambda': 2.2422533945735057}. Best is trial 12 with value: 0.8157894736842105.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98725
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:05:45 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_18_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/9209c5de1644481eb7491e63bcad4c15.
2025/12/12 15:05:45 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:05:46,437] Trial 18 finished with value: 0.8118790953286832 and parameters: {'n_estimators': 411, 'learning_rate': 0.08830923534029538, 'max_depth': 10, 'num_leaves': 37, 'min_child_samples': 87, 'colsample_bytree': 0.8027147451131884, 'subsample': 0.6659474341935786, 'reg_alpha': 1.229353837007485, 'reg_lambda': 0.6752338129405708}. Best is trial 12 with value: 0.8157894736842105.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088144 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:08:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_19_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/253d46bb34084f758e24d1b59c51d34d.
2025/12/12 15:08:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:08:49,279] Trial 19 finished with value: 0.8174804481082224 and parameters: {'n_estimators': 987, 'learning_rate': 0.04419414337667225, 'max_depth': 12, 'num_leaves': 66, 'min_child_samples': 41, 'colsample_bytree': 0.6867187411166911, 'subsample': 0.5708183150652317, 'reg_alpha': 0.09266298338875222, 'reg_lambda': 0.001848588168901063}. Best is trial 19 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.084399 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:10:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_20_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/5c1073c106ab4a648cc5823c29b18053.
2025/12/12 15:10:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:10:39,654] Trial 20 finished with value: 0.8169520186007186 and parameters: {'n_estimators': 980, 'learning_rate': 0.032739022150486755, 'max_depth': 13, 'num_leaves': 64, 'min_child_samples': 41, 'colsample_bytree': 0.6759037133565772, 'subsample': 0.5570346086828433, 'reg_alpha': 0.01891247029049464, 'reg_lambda': 0.0012739997103513696}. Best is trial 19 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.080705 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:13:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_21_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/97750da1454c4a8a902383ce1724b886.
2025/12/12 15:13:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:13:13,160] Trial 21 finished with value: 0.8177975058127246 and parameters: {'n_estimators': 960, 'learning_rate': 0.04243488624897648, 'max_depth': 13, 'num_leaves': 64, 'min_child_samples': 40, 'colsample_bytree': 0.6900526149106018, 'subsample': 0.565503072511517, 'reg_alpha': 0.011869589282555864, 'reg_lambda': 0.0015649125859066462}. Best is trial 21 with value: 0.8177975058127246.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.084860 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:15:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_22_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/a5ffa38150c64a15ad740655a09485a6.
2025/12/12 15:15:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:15:25,574] Trial 22 finished with value: 0.818431621221729 and parameters: {'n_estimators': 999, 'learning_rate': 0.039930833282807575, 'max_depth': 14, 'num_leaves': 67, 'min_child_samples': 43, 'colsample_bytree': 0.683294133877181, 'subsample': 0.569753394191402, 'reg_alpha': 0.017429485985215536, 'reg_lambda': 0.0010597103535495828}. Best is trial 22 with value: 0.818431621221729.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.085400 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98991
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:18:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_23_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/0f759046ba0b4ceba1979e31d4d7124f.
2025/12/12 15:18:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:18:40,154] Trial 23 finished with value: 0.7907419150285352 and parameters: {'n_estimators': 980, 'learning_rate': 0.005889811203072261, 'max_depth': 14, 'num_leaves': 71, 'min_child_samples': 29, 'colsample_bytree': 0.5840040830025541, 'subsample': 0.6466372539502134, 'reg_alpha': 0.0018036845736728996, 'reg_lambda': 0.001045529634680303}. Best is trial 22 with value: 0.818431621221729.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.082456 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98978
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:20:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_24_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/1ff118a42e3a42e1b4b294fd420723ae.
2025/12/12 15:20:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:20:56,393] Trial 24 finished with value: 0.8089198900866624 and parameters: {'n_estimators': 865, 'learning_rate': 0.01801260323351812, 'max_depth': 14, 'num_leaves': 78, 'min_child_samples': 48, 'colsample_bytree': 0.6484790137750435, 'subsample': 0.727258357684306, 'reg_alpha': 0.009326372638630969, 'reg_lambda': 0.004157444295481155}. Best is trial 22 with value: 0.818431621221729.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.085869 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98991
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:23:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_25_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/395f3dd143984856971157c71c46b71c.
2025/12/12 15:23:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:23:17,706] Trial 25 finished with value: 0.8196998520397379 and parameters: {'n_estimators': 998, 'learning_rate': 0.05077258689190266, 'max_depth': 12, 'num_leaves': 55, 'min_child_samples': 30, 'colsample_bytree': 0.5705523309793324, 'subsample': 0.579176120714812, 'reg_alpha': 0.001679996672135107, 'reg_lambda': 0.0006886750244914857}. Best is trial 25 with value: 0.8196998520397379.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067935 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98991
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:25:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_26_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/490a3933663440f58039c24e894fd962.
2025/12/12 15:25:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:25:56,902] Trial 26 finished with value: 0.7844007609384908 and parameters: {'n_estimators': 894, 'learning_rate': 0.005577829908880647, 'max_depth': 14, 'num_leaves': 55, 'min_child_samples': 31, 'colsample_bytree': 0.5188705887570343, 'subsample': 0.6304481590383517, 'reg_alpha': 0.0010758353535554393, 'reg_lambda': 0.0004139162688350673}. Best is trial 25 with value: 0.8196998520397379.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073801 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99097
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 983
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:30:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_27_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/de1c548c06ae4ed7b2fd8c2dcb7b6842.
2025/12/12 15:30:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:30:28,029] Trial 27 finished with value: 0.8207567110547453 and parameters: {'n_estimators': 925, 'learning_rate': 0.04701792244167326, 'max_depth': 15, 'num_leaves': 101, 'min_child_samples': 12, 'colsample_bytree': 0.5685262746289387, 'subsample': 0.7180122103307387, 'reg_alpha': 0.0007027584644469137, 'reg_lambda': 0.0005091842007182014}. Best is trial 27 with value: 0.8207567110547453.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.074463 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99107
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 985
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:34:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_28_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/f7a3048e6b404c16bed966c9504db782.
2025/12/12 15:34:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:34:22,537] Trial 28 finished with value: 0.8145212428662016 and parameters: {'n_estimators': 829, 'learning_rate': 0.018718345898084198, 'max_depth': 15, 'num_leaves': 99, 'min_child_samples': 11, 'colsample_bytree': 0.5688835409339579, 'subsample': 0.7304396958148202, 'reg_alpha': 0.0002491455701248255, 'reg_lambda': 0.0004208562705126048}. Best is trial 27 with value: 0.8207567110547453.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085339 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99119
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:37:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_29_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/91f52e1acc694fdfa8fffe8f73546dd2.
2025/12/12 15:37:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:37:02,240] Trial 29 finished with value: 0.7136968928344959 and parameters: {'n_estimators': 915, 'learning_rate': 0.0014986145480140743, 'max_depth': 12, 'num_leaves': 103, 'min_child_samples': 10, 'colsample_bytree': 0.6059705432733489, 'subsample': 0.7841019964579535, 'reg_alpha': 0.00015887568431620933, 'reg_lambda': 0.008291745816853826}. Best is trial 27 with value: 0.8207567110547453.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081800 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99025
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:39:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_30_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/158f0d7edee04b4898327fd005cc978d.
2025/12/12 15:39:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:39:07,261] Trial 30 finished with value: 0.7882054533925175 and parameters: {'n_estimators': 831, 'learning_rate': 0.006378643546548048, 'max_depth': 14, 'num_leaves': 77, 'min_child_samples': 20, 'colsample_bytree': 0.5479055376293988, 'subsample': 0.7048758512882072, 'reg_alpha': 0.0007898447080986283, 'reg_lambda': 0.00011472417774511907}. Best is trial 27 with value: 0.8207567110547453.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073880 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98991
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:41:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_31_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/43e51b0dd6804982b2a61d0dedbb9cc6.
2025/12/12 15:41:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:41:30,198] Trial 31 finished with value: 0.8191714225322342 and parameters: {'n_estimators': 948, 'learning_rate': 0.03919037367997551, 'max_depth': 13, 'num_leaves': 55, 'min_child_samples': 33, 'colsample_bytree': 0.5009304251453662, 'subsample': 0.5998854817081483, 'reg_alpha': 0.002356855982343916, 'reg_lambda': 0.0006617171635115184}. Best is trial 27 with value: 0.8207567110547453.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077311 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98991
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2025/12/12 15:43:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_32_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6/runs/b41615bd1556421496d2006a44ed1e73.
2025/12/12 15:43:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/#/experiments/6.
[I 2025-12-12 15:43:17,142] Trial 32 finished with value: 0.8171633904037201 and parameters: {'n_estimators': 937, 'learning_rate': 0.05299813087635911, 'max_depth': 15, 'num_leaves': 54, 'min_child_samples': 29, 'colsample_bytree': 0.5405426740291186, 'subsample': 0.6081158677899804, 'reg_alpha': 0.0020177578913449313, 'reg_lambda': 0.000635304660543714}. Best is trial 27 with value: 0.8207567110547453.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077364 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99001
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[W 2025-12-12 15:50:06,011] Trial 33 failed with parameters: {'n_estimators': 715, 'learning_rate': 0.026285737556424087, 'max_depth': 11, 'num_leaves': 53, 'min_child_samples': 24, 'colsample_bytree': 0.5036987345283499, 'subsample': 0.5552418213692347, 'reg_alpha': 0.0006460359843929025, 'reg_lambda': 0.0037761843581023305} because of the following error: MlflowException('API request to http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/api/2.0/mlflow/runs/get failed with exception HTTPConnectionPool(host=\'ec2-13-221-127-40.compute-1.amazonaws.com\', port=5000): Max retries exceeded with url: /api/2.0/mlflow/runs/get?run_uuid=90f91c29cdd643f7af7225735de62bee&run_id=90f91c29cdd643f7af7225735de62bee (Caused by NewConnectionError("HTTPConnection(host=\'ec2-13-221-127-40.compute-1.amazonaws.com\', port=5000): Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it"))').
Traceback (most recent call last):
  File

MlflowException: API request to http://ec2-13-221-127-40.compute-1.amazonaws.com:5000/api/2.0/mlflow/runs/get failed with exception HTTPConnectionPool(host='ec2-13-221-127-40.compute-1.amazonaws.com', port=5000): Max retries exceeded with url: /api/2.0/mlflow/runs/get?run_uuid=90f91c29cdd643f7af7225735de62bee&run_id=90f91c29cdd643f7af7225735de62bee (Caused by NewConnectionError("HTTPConnection(host='ec2-13-221-127-40.compute-1.amazonaws.com', port=5000): Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it"))