In [1]:
%cd ..

/Users/ishamrashik/Desktop/machine-learning-and-ai-projects/community-mcp-session/aws_community_day/youtube-sentiment-insights


In [2]:
import mlflow
import os 

# Set AWS credentials (replace with your actual credentials)
os.environ['AWS_ACCESS_KEY_ID'] = os.getenv('AWS_ACCESS_KEY_ID')
os.environ['AWS_SECRET_ACCESS_KEY'] = os.getenv('AWS_SECRET_ACCESS_KEY')
os.environ['AWS_DEFAULT_REGION'] = os.getenv('AWS_DEFAULT_REGION')

# Then set MLflow tracking URI
mlflow.set_tracking_uri("http://3.29.137.20:5000")

In [3]:
# Set or create an experiment
mlflow.set_experiment("LightGBM HP Tuning")

2025/10/12 19:18:07 INFO mlflow.tracking.fluent: Experiment with name 'LightGBM HP Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlflow-artifacts-5fb77f1c/688158500548363559', creation_time=1760282287992, experiment_id='688158500548363559', last_update_time=1760282287992, lifecycle_stage='active', name='LightGBM HP Tuning', tags={}>

In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt

In [6]:
from utilities import load_data, INTERIM_DATA_PATH

In [9]:
train_df_path = os.path.join(INTERIM_DATA_PATH, "train_processed.csv")
val_df_path = os.path.join(INTERIM_DATA_PATH, "val_processed.csv")
test_df_path = os.path.join(INTERIM_DATA_PATH, "test_processed.csv")

train_df = load_data(train_df_path)
val_df = load_data(val_df_path)
test_df = load_data(test_df_path)

2025-10-12 19:20:20,889 - model_building - DEBUG - Data loaded from data/interim/train_processed.csv


2025-10-12 19:20:20,894 - model_building - DEBUG - Data loaded from data/interim/val_processed.csv
2025-10-12 19:20:20,900 - model_building - DEBUG - Data loaded from data/interim/test_processed.csv


In [10]:
ngram_range = (1, 3)  # Trigram setting
max_features = 10000  # Set max_features to 1000 for TF-IDF

# Step 2: Vectorization using TF-IDF, fit on training data only
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)

# Step 1: Vectorize the text comments (Bag of Words)
X_train_text = vectorizer.fit_transform(train_df['clean_comment']).toarray()
X_val_text = vectorizer.transform(val_df['clean_comment']).toarray()

# Step 2: Extract numerical features
# Define the numerical feature columns to include
numerical_features = ['word_count', 'num_stop_words', 'num_chars', 'num_chars_cleaned']

X_train_numerical = train_df[numerical_features].values
X_val_numerical = val_df[numerical_features].values

# Step 3: Combine text features with numerical features
import numpy as np

X_train = np.hstack([X_train_text, X_train_numerical])
X_val = np.hstack([X_val_text, X_val_numerical])

# Target variable
y_train = train_df['category']
y_val = val_df['category']

smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [None]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_val, y_train, y_val, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)

        # Log accuracy
        accuracy = accuracy_score(y_val, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_val, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy

In [13]:
# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_val, y_train, y_val, params, trial.number)

    return accuracy



In [14]:
# Step 7: Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100)  # Increased to 100 trials

    # Get the best parameters
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                max_depth=best_params['max_depth'],
                                num_leaves=best_params['num_leaves'],
                                min_child_samples=best_params['min_child_samples'],
                                colsample_bytree=best_params['colsample_bytree'],
                                subsample=best_params['subsample'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                random_state=42)

    # Log the best model with MLflow and print the classification report
    log_mlflow("LightGBM", best_model, X_train, X_val, y_train, y_val, best_params, "Best")

    # Plot parameter importance
    optuna.visualization.plot_param_importances(study).show()

    # Plot optimization history
    optuna.visualization.plot_optimization_history(study).show()

In [None]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-10-12 19:22:02,485] A new study created in memory with name: no-name-b5113580-f9fd-45e1-9366-ecb2fdb6a035


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099641 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 119160
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1813
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:22:19,410] Trial 0 finished with value: 0.5875769445998881 and parameters: {'n_estimators': 246, 'learning_rate': 0.0005639336751050803, 'max_depth': 14, 'num_leaves': 62, 'min_child_samples': 64, 'colsample_bytree': 0.9949638648840109, 'subsample': 0.7148386862154588, 'reg_alpha': 0.0002134627272484808, 'reg_lambda': 2.579866935078037}. Best is trial 0 with value: 0.5875769445998881.


🏃 View run Trial_0_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/5496f7975a43433f992042f100ea949e
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.119432 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 125378
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2122
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:22:28,084] Trial 1 finished with value: 0.6944599888080581 and parameters: {'n_estimators': 586, 'learning_rate': 0.023208376081236694, 'max_depth': 4, 'num_leaves': 34, 'min_child_samples': 57, 'colsample_bytree': 0.7890174948017576, 'subsample': 0.6034459594604498, 'reg_alpha': 0.17753851215132846, 'reg_lambda': 0.2586018313798269}. Best is trial 1 with value: 0.6944599888080581.


🏃 View run Trial_1_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/c82f0131d276491292efcfae38c7d81f
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.121151 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 124458
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2076
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:22:42,474] Trial 2 finished with value: 0.6502518186905428 and parameters: {'n_estimators': 372, 'learning_rate': 0.008576276705296119, 'max_depth': 8, 'num_leaves': 23, 'min_child_samples': 58, 'colsample_bytree': 0.5812979674623733, 'subsample': 0.894855484810206, 'reg_alpha': 0.5411048124400231, 'reg_lambda': 0.10697732589006617}. Best is trial 1 with value: 0.6944599888080581.


🏃 View run Trial_2_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/735363d0028841baa2d06cc09a439743
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.179690 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 142356
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 3173
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:23:51,543] Trial 3 finished with value: 0.6502518186905428 and parameters: {'n_estimators': 929, 'learning_rate': 0.0011789895341467141, 'max_depth': 12, 'num_leaves': 102, 'min_child_samples': 37, 'colsample_bytree': 0.533620334014156, 'subsample': 0.7380026927559413, 'reg_alpha': 0.09317332077182354, 'reg_lambda': 8.68086102661169}. Best is trial 1 with value: 0.6944599888080581.


🏃 View run Trial_3_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/2131ce7ed4a44e058511f5d187a263ee
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.142122 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 131982
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2480
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:24:00,552] Trial 4 finished with value: 0.6743144935646335 and parameters: {'n_estimators': 532, 'learning_rate': 0.016869815912537542, 'max_depth': 4, 'num_leaves': 102, 'min_child_samples': 48, 'colsample_bytree': 0.9613892872490676, 'subsample': 0.8807129102510378, 'reg_alpha': 0.9373384473877794, 'reg_lambda': 0.36986199212780735}. Best is trial 1 with value: 0.6944599888080581.


🏃 View run Trial_4_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/6f337b2c128b4e4793965dee922edb7e
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.122628 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126032
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2155
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:24:19,344] Trial 5 finished with value: 0.7285954113038612 and parameters: {'n_estimators': 260, 'learning_rate': 0.03534994259492832, 'max_depth': 15, 'num_leaves': 138, 'min_child_samples': 56, 'colsample_bytree': 0.5641496658351339, 'subsample': 0.9050052276538025, 'reg_alpha': 0.0021619324052754747, 'reg_lambda': 0.0005008453299548682}. Best is trial 5 with value: 0.7285954113038612.


🏃 View run Trial_5_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/384552557b3f4ea194b4cd69d5052328
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.067708 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 101451
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1147
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:24:25,030] Trial 6 finished with value: 0.6088416340235031 and parameters: {'n_estimators': 269, 'learning_rate': 0.014120198872995648, 'max_depth': 3, 'num_leaves': 112, 'min_child_samples': 98, 'colsample_bytree': 0.9248567378432204, 'subsample': 0.7386963455496762, 'reg_alpha': 0.0017636605963582334, 'reg_lambda': 0.12460132672466388}. Best is trial 5 with value: 0.7285954113038612.


🏃 View run Trial_6_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/25c0cfe62cd14f41b512eba46f198807
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.447867 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181576
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 8057
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:24:47,048] Trial 7 finished with value: 0.6726357022943481 and parameters: {'n_estimators': 354, 'learning_rate': 0.010396100183196933, 'max_depth': 9, 'num_leaves': 52, 'min_child_samples': 10, 'colsample_bytree': 0.6736309652455017, 'subsample': 0.9200646460109771, 'reg_alpha': 0.00035215382398887167, 'reg_lambda': 0.19413942661631767}. Best is trial 5 with value: 0.7285954113038612.


🏃 View run Trial_7_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/461242df5c70494a93f6e368bbe5eb51
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.133561 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 128617
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2290
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:24:54,646] Trial 8 finished with value: 0.5730274202574147 and parameters: {'n_estimators': 283, 'learning_rate': 0.004514598365179067, 'max_depth': 4, 'num_leaves': 108, 'min_child_samples': 52, 'colsample_bytree': 0.6903805939471894, 'subsample': 0.9998153128878182, 'reg_alpha': 0.0024066463558791674, 'reg_lambda': 0.03686393535389523}. Best is trial 5 with value: 0.7285954113038612.


🏃 View run Trial_8_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/bbe386de899c4c99be1492b96272b60e
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090771 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 112695
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1536
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:25:01,546] Trial 9 finished with value: 0.6149972020145495 and parameters: {'n_estimators': 447, 'learning_rate': 0.010240801111434345, 'max_depth': 3, 'num_leaves': 97, 'min_child_samples': 74, 'colsample_bytree': 0.7908813130541268, 'subsample': 0.7963046187828337, 'reg_alpha': 0.3587204635800886, 'reg_lambda': 0.010048903539988599}. Best is trial 5 with value: 0.7285954113038612.


🏃 View run Trial_9_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/6f80414b004f4e47809f2c439923deb4
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.245925 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 154940
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 4272
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:25:15,949] Trial 10 finished with value: 0.7325125909345271 and parameters: {'n_estimators': 115, 'learning_rate': 0.09318284674410046, 'max_depth': 15, 'num_leaves': 145, 'min_child_samples': 27, 'colsample_bytree': 0.510360365908516, 'subsample': 0.5253161412697429, 'reg_alpha': 0.009856958202427227, 'reg_lambda': 0.00013772864964777212}. Best is trial 10 with value: 0.7325125909345271.


🏃 View run Trial_10_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/57f5b85c346a46e29d7be5cadb12100e
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.270743 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 161472
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 4958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:25:28,640] Trial 11 finished with value: 0.7263570229434807 and parameters: {'n_estimators': 105, 'learning_rate': 0.09774691556734004, 'max_depth': 14, 'num_leaves': 147, 'min_child_samples': 23, 'colsample_bytree': 0.5259338905851821, 'subsample': 0.5193471173654032, 'reg_alpha': 0.010757436242015537, 'reg_lambda': 0.00016957871304649106}. Best is trial 10 with value: 0.7325125909345271.


🏃 View run Trial_11_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/924bf53e5ae94f79809fb4212024bb0f
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.199577 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 147928
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 3620
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:25:38,169] Trial 12 finished with value: 0.7112479015109121 and parameters: {'n_estimators': 106, 'learning_rate': 0.09787615662164474, 'max_depth': 15, 'num_leaves': 148, 'min_child_samples': 32, 'colsample_bytree': 0.6242096148145511, 'subsample': 0.6105227959020537, 'reg_alpha': 7.869495199979705, 'reg_lambda': 0.00015737050257769476}. Best is trial 10 with value: 0.7325125909345271.


🏃 View run Trial_12_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/00bcf7f6af9f401d990381d057b9e966
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.081954 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 109228
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1404
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:26:00,657] Trial 13 finished with value: 0.7381085618354785 and parameters: {'n_estimators': 625, 'learning_rate': 0.04640966729696016, 'max_depth': 12, 'num_leaves': 131, 'min_child_samples': 80, 'colsample_bytree': 0.5231873748276997, 'subsample': 0.5237782439694341, 'reg_alpha': 0.014397538965706882, 'reg_lambda': 0.0018733767042289763}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_13_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/2c15b3e7986c48dfbe9536c12d36836d
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071808 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 106276
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1300
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:26:25,258] Trial 14 finished with value: 0.7347509792949076 and parameters: {'n_estimators': 704, 'learning_rate': 0.045848611945221524, 'max_depth': 12, 'num_leaves': 125, 'min_child_samples': 86, 'colsample_bytree': 0.5091447553897349, 'subsample': 0.5055527743223778, 'reg_alpha': 0.019466878921873418, 'reg_lambda': 0.0023279642612097444}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_14_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/2de19d619b2b47d991369bc76eb327b6
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074268 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105576
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1276
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:27:13,184] Trial 15 finished with value: 0.630106323447118 and parameters: {'n_estimators': 761, 'learning_rate': 0.00015201015687520353, 'max_depth': 11, 'num_leaves': 124, 'min_child_samples': 88, 'colsample_bytree': 0.6206501851552317, 'subsample': 0.6042538073657039, 'reg_alpha': 0.037936448397263615, 'reg_lambda': 0.0030759300966436234}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_15_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/2b125e8dcefc490ba1627a850e21e8e0
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.083428 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 110332
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1445
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:27:32,645] Trial 16 finished with value: 0.7364297705651931 and parameters: {'n_estimators': 711, 'learning_rate': 0.04225984384290288, 'max_depth': 11, 'num_leaves': 81, 'min_child_samples': 78, 'colsample_bytree': 0.8630991777834351, 'subsample': 0.6566533189658993, 'reg_alpha': 0.017981811536398374, 'reg_lambda': 0.0015576019932632247}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_16_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/814b71cbce7d4ce190a37b09de1b8aa0
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088616 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 112695
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1536
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:28:04,280] Trial 17 finished with value: 0.6200335758254057 and parameters: {'n_estimators': 808, 'learning_rate': 0.0025189628269299707, 'max_depth': 7, 'num_leaves': 77, 'min_child_samples': 74, 'colsample_bytree': 0.8664013166691226, 'subsample': 0.6591013000228586, 'reg_alpha': 0.04626095721174222, 'reg_lambda': 0.0011715262738669463}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_17_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/50d97ff598c447658883793584c9bcc6
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092792 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 112695
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1536
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:28:21,810] Trial 18 finished with value: 0.731952993844432 and parameters: {'n_estimators': 669, 'learning_rate': 0.039925557882489, 'max_depth': 10, 'num_leaves': 83, 'min_child_samples': 74, 'colsample_bytree': 0.8661932534387222, 'subsample': 0.6703006202703735, 'reg_alpha': 2.0977967868314913, 'reg_lambda': 0.007444354255917563}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_18_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/7fec340767264bbb82bbf43a5735fd24
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066339 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100911
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1131
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:28:50,539] Trial 19 finished with value: 0.6743144935646335 and parameters: {'n_estimators': 1000, 'learning_rate': 0.004905049938976538, 'max_depth': 7, 'num_leaves': 67, 'min_child_samples': 99, 'colsample_bytree': 0.7268108862149227, 'subsample': 0.562950071980136, 'reg_alpha': 0.005066883786240248, 'reg_lambda': 0.013022743155699073}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_19_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/c060de855c3d485d8ff4ccf0a3e67d37
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107035
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1326
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:29:28,308] Trial 20 finished with value: 0.6094012311135982 and parameters: {'n_estimators': 845, 'learning_rate': 0.00014903936363091884, 'max_depth': 12, 'num_leaves': 47, 'min_child_samples': 84, 'colsample_bytree': 0.8422756752455103, 'subsample': 0.8064021335166902, 'reg_alpha': 0.0005326611677091958, 'reg_lambda': 0.0005568632596348744}. Best is trial 13 with value: 0.7381085618354785.


🏃 View run Trial_20_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/244fdbab957847ef8fe6a3ee45f92579
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077996 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 107035
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1326
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:29:50,228] Trial 21 finished with value: 0.7397873531057638 and parameters: {'n_estimators': 654, 'learning_rate': 0.04019806865120706, 'max_depth': 12, 'num_leaves': 126, 'min_child_samples': 84, 'colsample_bytree': 0.6192878812268102, 'subsample': 0.5010874759774191, 'reg_alpha': 0.01580553267257938, 'reg_lambda': 0.002058406714996538}. Best is trial 21 with value: 0.7397873531057638.


🏃 View run Trial_21_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/12cc029a604a4b1c95d302a25f5d4ddd
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098621 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 116961
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1712
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:30:11,494] Trial 22 finished with value: 0.7414661443760493 and parameters: {'n_estimators': 585, 'learning_rate': 0.047728371798613396, 'max_depth': 13, 'num_leaves': 127, 'min_child_samples': 67, 'colsample_bytree': 0.6254146804205554, 'subsample': 0.5564239900250472, 'reg_alpha': 0.07102805352045706, 'reg_lambda': 0.0031352333827538315}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_22_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/cac70ada7dbd4c6f81cf3e6929b153cc
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098319 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 117501
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1736
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:30:37,670] Trial 23 finished with value: 0.731952993844432 and parameters: {'n_estimators': 586, 'learning_rate': 0.02466974154697163, 'max_depth': 13, 'num_leaves': 130, 'min_child_samples': 66, 'colsample_bytree': 0.6208419830268492, 'subsample': 0.5611940727509832, 'reg_alpha': 0.09981313073171035, 'reg_lambda': 0.0048191599869032205}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_23_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/fc78cdeb776044db9c25a84f10839be1
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108713 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 117501
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1736
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:30:58,955] Trial 24 finished with value: 0.7347509792949076 and parameters: {'n_estimators': 512, 'learning_rate': 0.04951292536002632, 'max_depth': 13, 'num_leaves': 120, 'min_child_samples': 66, 'colsample_bytree': 0.5770787501715584, 'subsample': 0.5582259521836338, 'reg_alpha': 0.005589264159380168, 'reg_lambda': 0.0005746275242908061}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_24_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/8ae622db29384ed89fa3756170f6b3be
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.068786 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 103196
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1200
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:31:17,090] Trial 25 finished with value: 0.7341913822048125 and parameters: {'n_estimators': 655, 'learning_rate': 0.061008000877902, 'max_depth': 11, 'num_leaves': 131, 'min_child_samples': 93, 'colsample_bytree': 0.6732256239351695, 'subsample': 0.5545521367096001, 'reg_alpha': 0.0010018304605576636, 'reg_lambda': 0.022456809174894666}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_25_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/3f9950d40479475885745ebc2ad3ab2e
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077756 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 107922
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1357
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:31:33,739] Trial 26 finished with value: 0.7168438724118634 and parameters: {'n_estimators': 475, 'learning_rate': 0.020646252203256124, 'max_depth': 10, 'num_leaves': 116, 'min_child_samples': 82, 'colsample_bytree': 0.6502733927302562, 'subsample': 0.5041683732453744, 'reg_alpha': 0.06366575476535453, 'reg_lambda': 0.0012989233239175742}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_26_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/2d560d8b36ba4a5e9df4b94885d8af15
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.149244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 135415
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2689
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:32:18,385] Trial 27 finished with value: 0.6843872411863459 and parameters: {'n_estimators': 613, 'learning_rate': 0.006604415776580504, 'max_depth': 13, 'num_leaves': 91, 'min_child_samples': 44, 'colsample_bytree': 0.717141461505668, 'subsample': 0.625485502823576, 'reg_alpha': 0.18488385068567081, 'reg_lambda': 0.03626920127220561}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_27_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/1a304cdfb373447296482dbc6e727bdf
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067924 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 104057
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1227
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:32:43,631] Trial 28 finished with value: 0.634023503077784 and parameters: {'n_estimators': 422, 'learning_rate': 0.00240805577885325, 'max_depth': 10, 'num_leaves': 139, 'min_child_samples': 91, 'colsample_bytree': 0.5598908459514369, 'subsample': 0.5814085689327315, 'reg_alpha': 0.024342187997819505, 'reg_lambda': 0.0041188076412793885}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_28_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/c89300471f864ce8ae34d3afb8692b5b
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093469 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 116130
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1677
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:33:49,660] Trial 29 finished with value: 0.6446558477895915 and parameters: {'n_estimators': 772, 'learning_rate': 0.0003392541353283751, 'max_depth': 14, 'num_leaves': 132, 'min_child_samples': 68, 'colsample_bytree': 0.5958157849973961, 'subsample': 0.6785686169403622, 'reg_alpha': 0.005410982309400866, 'reg_lambda': 0.7302627318362129}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_29_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/3c21344bcca24c7bad3c00b0e62ab2cb
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.085476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 108507
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1378
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:34:24,295] Trial 30 finished with value: 0.7392277560156687 and parameters: {'n_estimators': 868, 'learning_rate': 0.028775694058315444, 'max_depth': 13, 'num_leaves': 113, 'min_child_samples': 81, 'colsample_bytree': 0.5429178214765973, 'subsample': 0.6969424157239626, 'reg_alpha': 0.17688755960140626, 'reg_lambda': 0.00033403902972016655}. Best is trial 22 with value: 0.7414661443760493.


🏃 View run Trial_30_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/ac7f406fb1564983bbe70f8980ed570c
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.081304 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 109728
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1422
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:34:58,533] Trial 31 finished with value: 0.7442641298265249 and parameters: {'n_estimators': 939, 'learning_rate': 0.06681592880542604, 'max_depth': 13, 'num_leaves': 113, 'min_child_samples': 79, 'colsample_bytree': 0.5433578086051002, 'subsample': 0.7067145051670146, 'reg_alpha': 0.00010570822989732897, 'reg_lambda': 0.000346014582726571}. Best is trial 31 with value: 0.7442641298265249.


🏃 View run Trial_31_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/517142e72e08433385df1b2b344fea56
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090773 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 114301
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1601
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:35:35,165] Trial 32 finished with value: 0.7347509792949076 and parameters: {'n_estimators': 890, 'learning_rate': 0.02656921248281728, 'max_depth': 14, 'num_leaves': 108, 'min_child_samples': 71, 'colsample_bytree': 0.5506328962828358, 'subsample': 0.6998516660112962, 'reg_alpha': 0.0001259528439323378, 'reg_lambda': 0.0002837571472041999}. Best is trial 31 with value: 0.7442641298265249.


🏃 View run Trial_32_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/50c614d137ca4555a122b494eb653329
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.110238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 122107
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1953
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:36:10,453] Trial 33 finished with value: 0.7420257414661444 and parameters: {'n_estimators': 983, 'learning_rate': 0.07141812526985418, 'max_depth': 13, 'num_leaves': 118, 'min_child_samples': 60, 'colsample_bytree': 0.6036267329435303, 'subsample': 0.7831090359542278, 'reg_alpha': 0.21391533096617, 'reg_lambda': 0.00035432981918371973}. Best is trial 31 with value: 0.7442641298265249.


🏃 View run Trial_33_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/45dab1ae831747758df9a8dd53b7bff2
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109381 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 121120
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1905
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:36:45,275] Trial 34 finished with value: 0.74482372691662 and parameters: {'n_estimators': 998, 'learning_rate': 0.0637856153965358, 'max_depth': 13, 'num_leaves': 119, 'min_child_samples': 61, 'colsample_bytree': 0.5975958914307973, 'subsample': 0.8235276171487949, 'reg_alpha': 1.835739680755617, 'reg_lambda': 0.0008191881632847142}. Best is trial 34 with value: 0.74482372691662.


🏃 View run Trial_34_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/60636b50c2fc4bc2835c8e9dbb8b0cb7
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108902 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 121120
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1905
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:37:22,732] Trial 35 finished with value: 0.7465025181869054 and parameters: {'n_estimators': 979, 'learning_rate': 0.06666217288087542, 'max_depth': 14, 'num_leaves': 101, 'min_child_samples': 61, 'colsample_bytree': 0.5913104500300685, 'subsample': 0.78869181469351, 'reg_alpha': 1.4047821710100246, 'reg_lambda': 0.0007315472843583373}. Best is trial 35 with value: 0.7465025181869054.


🏃 View run Trial_35_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/0df94178864f4248a763337c56f915c8
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.143181 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 133733
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2583
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:38:14,969] Trial 36 finished with value: 0.7420257414661444 and parameters: {'n_estimators': 980, 'learning_rate': 0.07320039682047508, 'max_depth': 15, 'num_leaves': 93, 'min_child_samples': 46, 'colsample_bytree': 0.5776156328988543, 'subsample': 0.7867599185346763, 'reg_alpha': 2.249916399387457, 'reg_lambda': 0.0009125155866965222}. Best is trial 35 with value: 0.7465025181869054.


🏃 View run Trial_36_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/387e8e8366244dcea36b9fcd68a613c5
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.112124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 122107
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1953
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:39:03,268] Trial 37 finished with value: 0.6485730274202575 and parameters: {'n_estimators': 929, 'learning_rate': 0.0010712343318571515, 'max_depth': 14, 'num_leaves': 102, 'min_child_samples': 60, 'colsample_bytree': 0.6008185235345772, 'subsample': 0.8407767174790465, 'reg_alpha': 8.689448504722137, 'reg_lambda': 0.00010247120280718473}. Best is trial 35 with value: 0.7465025181869054.


🏃 View run Trial_37_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/26e19489aa124abc975fd996cee743c6
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113271 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 121120
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 1905
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:39:41,528] Trial 38 finished with value: 0.7291550083939563 and parameters: {'n_estimators': 920, 'learning_rate': 0.01522502118362927, 'max_depth': 14, 'num_leaves': 89, 'min_child_samples': 61, 'colsample_bytree': 0.6523019220252034, 'subsample': 0.7622629750904191, 'reg_alpha': 1.969650055218111, 'reg_lambda': 0.0002976838614248435}. Best is trial 35 with value: 0.7465025181869054.


🏃 View run Trial_38_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/3892fe5d382443fe9d4275b11bfe9a5a
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.130607 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 128617
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2290
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:40:07,580] Trial 39 finished with value: 0.7437045327364298 and parameters: {'n_estimators': 966, 'learning_rate': 0.07547316696330439, 'max_depth': 11, 'num_leaves': 105, 'min_child_samples': 52, 'colsample_bytree': 0.7691074672166897, 'subsample': 0.860780521152517, 'reg_alpha': 0.7938507572843295, 'reg_lambda': 0.0007373834898697169}. Best is trial 35 with value: 0.7465025181869054.


🏃 View run Trial_39_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/3eb5284dc1644d759f8f79fe230b67be
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.135574 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 129711
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2351
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:40:33,808] Trial 40 finished with value: 0.7269166200335758 and parameters: {'n_estimators': 931, 'learning_rate': 0.019126626004435876, 'max_depth': 9, 'num_leaves': 72, 'min_child_samples': 51, 'colsample_bytree': 0.7641969097888682, 'subsample': 0.8623006465078543, 'reg_alpha': 0.9672610623084757, 'reg_lambda': 0.0007449940309021891}. Best is trial 35 with value: 0.7465025181869054.


🏃 View run Trial_40_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/59153a69c2c44d1ba7889b24c8ddc6e2
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.131088 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 126648
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2186
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:40:59,586] Trial 41 finished with value: 0.7437045327364298 and parameters: {'n_estimators': 968, 'learning_rate': 0.062051866319133626, 'max_depth': 11, 'num_leaves': 103, 'min_child_samples': 55, 'colsample_bytree': 0.8147192313773091, 'subsample': 0.8318301260805167, 'reg_alpha': 0.6696910057086446, 'reg_lambda': 0.0003727783703986542}. Best is trial 35 with value: 0.7465025181869054.


🏃 View run Trial_41_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/d3202fbc8bfa4299bf7030868e7c993d
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.156783 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 136341
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2747
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:41:29,088] Trial 42 finished with value: 0.7470621152770005 and parameters: {'n_estimators': 955, 'learning_rate': 0.06461764160601154, 'max_depth': 11, 'num_leaves': 102, 'min_child_samples': 43, 'colsample_bytree': 0.8118796752106685, 'subsample': 0.8280650294320107, 'reg_alpha': 1.0181573082126334, 'reg_lambda': 8.489415685659853}. Best is trial 42 with value: 0.7470621152770005.


🏃 View run Trial_42_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/57a031ca86b94cf5b2d65ad4990d13fc
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.162639 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 138830
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2913
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:42:02,068] Trial 43 finished with value: 0.7392277560156687 and parameters: {'n_estimators': 842, 'learning_rate': 0.0711647166161858, 'max_depth': 12, 'num_leaves': 97, 'min_child_samples': 40, 'colsample_bytree': 0.9102739571858774, 'subsample': 0.9449154449994015, 'reg_alpha': 3.9698435005306614, 'reg_lambda': 7.468228677053931}. Best is trial 42 with value: 0.7470621152770005.


🏃 View run Trial_43_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/0e9e5f5d1d2f4129bbe78f0853bd9d1d
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.160119 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 137931
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2853
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:42:22,296] Trial 44 finished with value: 0.7341913822048125 and parameters: {'n_estimators': 896, 'learning_rate': 0.030839636469206688, 'max_depth': 8, 'num_leaves': 106, 'min_child_samples': 41, 'colsample_bytree': 0.7681341304352226, 'subsample': 0.7492402550949746, 'reg_alpha': 1.2085222403496814, 'reg_lambda': 1.246323162963502}. Best is trial 42 with value: 0.7470621152770005.


🏃 View run Trial_44_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/47fc3b9ddbd54ddbb2a6f4a7bd040317
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.140817 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 131982
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2480
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-10-12 19:42:50,535] Trial 45 finished with value: 0.7453833240067151 and parameters: {'n_estimators': 947, 'learning_rate': 0.09860982704052265, 'max_depth': 10, 'num_leaves': 110, 'min_child_samples': 48, 'colsample_bytree': 0.8092777248562248, 'subsample': 0.8182380111633543, 'reg_alpha': 4.565427689338645, 'reg_lambda': 0.06643393905246957}. Best is trial 42 with value: 0.7470621152770005.


🏃 View run Trial_45_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/868beefba6944721a9fc6643467ea51d
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.188269 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 145409
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 3409
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Exception ignored on calling ctypes callback function: <function _log_callback at 0x14d7eafc0>
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/mlops_aws_community/lib/python3.12/site-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
    
KeyboardInterrupt: 


No further splits with positive gain, best gain: -inf


[I 2025-10-12 19:43:21,028] Trial 46 finished with value: 0.7129266927811976 and parameters: {'n_estimators': 946, 'learning_rate': 0.013961400386068315, 'max_depth': 9, 'num_leaves': 111, 'min_child_samples': 34, 'colsample_bytree': 0.8217441927378564, 'subsample': 0.8200522051436397, 'reg_alpha': 4.536858643747722, 'reg_lambda': 0.06642272775289775}. Best is trial 42 with value: 0.7470621152770005.


🏃 View run Trial_46_LightGBM_SMOTE_TFIDF_Trigrams at: http://3.29.137.20:5000/#/experiments/688158500548363559/runs/bf47680c6ee44235b827975cd411d0d5
🧪 View experiment at: http://3.29.137.20:5000/#/experiments/688158500548363559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.137187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 131200
[LightGBM] [Info] Number of data points in the train set: 23397, number of used features: 2434
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
