In [1]:
!pip install mlflow boto3 awscli

Collecting mlflow
  Downloading mlflow-3.4.0-py3-none-any.whl.metadata (30 kB)
Collecting boto3
  Downloading boto3-1.40.42-py3-none-any.whl.metadata (6.7 kB)
Collecting awscli
  Downloading awscli-1.42.42-py3-none-any.whl.metadata (11 kB)
Collecting mlflow-skinny==3.4.0 (from mlflow)
  Downloading mlflow_skinny-3.4.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.4.0 (from mlflow)
  Downloading mlflow_tracing-3.4.0-py3-none-any.whl.metadata (19 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting fastmcp<3,>=2.0.0 (from mlflow)
  Downloading fastmcp-2.12.4-py3-none-any.whl.metadata (19 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.4.0->mlflow)
  Downloading databricks_sdk-0.67.0-py3-n

In [2]:
import mlflow
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri("http://ec2-35-92-223-149.us-west-2.compute.amazonaws.com:8000/")

In [4]:
mlflow.set_experiment("ML Algos with HP Tuning")

2025/10/01 04:37:43 INFO mlflow.tracking.fluent: Experiment with name 'ML Algos with HP Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlflow-bucket-damini/543202711501021658', creation_time=1759293463518, experiment_id='543202711501021658', last_update_time=1759293463518, lifecycle_stage='active', name='ML Algos with HP Tuning', tags={}>

In [6]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.5.0


In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna

In [8]:
from google.colab import files

uploaded = files.upload()

Saving reddit_preprocessing.xls to reddit_preprocessing.xls


In [9]:
df = pd.read_csv('reddit_preprocessing.xls')
df.head()

Unnamed: 0,clean_comment,category
0,family mormon have never tried explain them th...,1
1,buddhism has very much lot compatible with chr...,1
2,seriously don say thing first all they won get...,-1
3,what you have learned yours and only yours wha...,0
4,for your own benefit you may want read living ...,1


In [13]:
df.shape

(36607, 2)

In [19]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])
X_train,X_test,y_train,y_test= train_test_split(df['clean_comment'],df['category'],random_state=42,test_size=0.2,stratify=df['category'])

# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X_train_vec=vectorizer.fit_transform(X_train)
X_test_vec=vectorizer.transform(X_test)

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_vec, y_train)

# Step 5: Train-test split
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train_resampled, X_test_vec, y_train_resampled, y_test):
    with mlflow.start_run():
        # Log model type
        mlflow.set_tag("mlflow.runName", f"{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model
        model.fit(X_train_resampled, y_train_resampled)
        y_pred = model.predict(X_test_vec)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")


# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)
    return accuracy_score(y_test, model.fit(X_train_resampled, y_train_resampled).predict(X_test_vec))


# Step 7: Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    # Get the best parameters and log only the best model
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    # Log the best model with MLflow, passing the algo_name as "LightGBM"
    log_mlflow("LightGBM", best_model, X_train_resampled, X_test_vec, y_train_resampled, y_test)

# Run the experiment for LightGBM
run_optuna_experiment()


[I 2025-10-01 05:09:44,334] A new study created in memory with name: no-name-9c2ee655-2150-4ff5-af3c-f9fd4e2c3988


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.160589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:09:46,978] Trial 0 finished with value: 0.894625550660793 and parameters: {'n_estimators': 76, 'learning_rate': 0.08640013598972707, 'max_depth': 10}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.167102 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:09:50,171] Trial 1 finished with value: 0.7154185022026431 and parameters: {'n_estimators': 181, 'learning_rate': 0.0017350214365563776, 'max_depth': 4}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.275695 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:00,838] Trial 2 finished with value: 0.7517180616740088 and parameters: {'n_estimators': 285, 'learning_rate': 0.00021623958800334476, 'max_depth': 8}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.160931 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:05,499] Trial 3 finished with value: 0.7978854625550661 and parameters: {'n_estimators': 98, 'learning_rate': 0.006982899203753769, 'max_depth': 10}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.161680 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:14,368] Trial 4 finished with value: 0.7570044052863436 and parameters: {'n_estimators': 255, 'learning_rate': 0.00013812977617161983, 'max_depth': 9}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.157893 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:18,083] Trial 5 finished with value: 0.7844933920704846 and parameters: {'n_estimators': 108, 'learning_rate': 0.015590823832508535, 'max_depth': 5}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.164259 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:20,953] Trial 6 finished with value: 0.7245814977973568 and parameters: {'n_estimators': 158, 'learning_rate': 0.0027628919884742638, 'max_depth': 4}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.176238 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:30,795] Trial 7 finished with value: 0.761409691629956 and parameters: {'n_estimators': 263, 'learning_rate': 0.00039197494251206076, 'max_depth': 8}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.163246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:37,309] Trial 8 finished with value: 0.8019383259911894 and parameters: {'n_estimators': 174, 'learning_rate': 0.005401733901986217, 'max_depth': 10}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.157143 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:38,594] Trial 9 finished with value: 0.7275770925110132 and parameters: {'n_estimators': 53, 'learning_rate': 0.01016242975861308, 'max_depth': 4}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.174893 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:40,497] Trial 10 finished with value: 0.85568281938326 and parameters: {'n_estimators': 55, 'learning_rate': 0.0800174672579352, 'max_depth': 6}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.271685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:42,827] Trial 11 finished with value: 0.8630837004405286 and parameters: {'n_estimators': 52, 'learning_rate': 0.0971683766759935, 'max_depth': 6}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.175552 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:45,559] Trial 12 finished with value: 0.8903964757709251 and parameters: {'n_estimators': 102, 'learning_rate': 0.08240971983723529, 'max_depth': 7}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.161063 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:49,305] Trial 13 finished with value: 0.8562114537444934 and parameters: {'n_estimators': 113, 'learning_rate': 0.030855447454094395, 'max_depth': 8}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.155216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:53,530] Trial 14 finished with value: 0.8704845814977974 and parameters: {'n_estimators': 140, 'learning_rate': 0.03495274119168277, 'max_depth': 7}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.283214 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:57,422] Trial 15 finished with value: 0.7640528634361233 and parameters: {'n_estimators': 87, 'learning_rate': 0.0010460887449722698, 'max_depth': 9}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.156180 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:10:59,586] Trial 16 finished with value: 0.8355947136563877 and parameters: {'n_estimators': 204, 'learning_rate': 0.03471431180722403, 'max_depth': 3}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.176530 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:04,104] Trial 17 finished with value: 0.8359471365638766 and parameters: {'n_estimators': 132, 'learning_rate': 0.021809355659859583, 'max_depth': 7}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.160239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:07,809] Trial 18 finished with value: 0.8771806167400881 and parameters: {'n_estimators': 86, 'learning_rate': 0.05254744796485662, 'max_depth': 9}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.171788 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:15,420] Trial 19 finished with value: 0.852511013215859 and parameters: {'n_estimators': 213, 'learning_rate': 0.012349301938496097, 'max_depth': 10}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.169150 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:17,239] Trial 20 finished with value: 0.8304845814977974 and parameters: {'n_estimators': 70, 'learning_rate': 0.05274701313843077, 'max_depth': 5}. Best is trial 0 with value: 0.894625550660793.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.161534 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:22,532] Trial 21 finished with value: 0.9018502202643172 and parameters: {'n_estimators': 83, 'learning_rate': 0.09948996391475523, 'max_depth': 9}. Best is trial 21 with value: 0.9018502202643172.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.189702 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:26,718] Trial 22 finished with value: 0.9124229074889868 and parameters: {'n_estimators': 124, 'learning_rate': 0.09781264765632024, 'max_depth': 9}. Best is trial 22 with value: 0.9124229074889868.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.164944 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:30,251] Trial 23 finished with value: 0.9131277533039648 and parameters: {'n_estimators': 130, 'learning_rate': 0.09521502310895262, 'max_depth': 9}. Best is trial 23 with value: 0.9131277533039648.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.157219 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:34,989] Trial 24 finished with value: 0.8842290748898678 and parameters: {'n_estimators': 126, 'learning_rate': 0.04684395993407276, 'max_depth': 9}. Best is trial 23 with value: 0.9131277533039648.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.178146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:39,978] Trial 25 finished with value: 0.8537444933920705 and parameters: {'n_estimators': 152, 'learning_rate': 0.022098510129461565, 'max_depth': 8}. Best is trial 23 with value: 0.9131277533039648.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.162061 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:43,650] Trial 26 finished with value: 0.8858149779735682 and parameters: {'n_estimators': 118, 'learning_rate': 0.05122530120215901, 'max_depth': 9}. Best is trial 23 with value: 0.9131277533039648.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.159167 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:49,350] Trial 27 finished with value: 0.8491629955947136 and parameters: {'n_estimators': 147, 'learning_rate': 0.02079178452477557, 'max_depth': 8}. Best is trial 23 with value: 0.9131277533039648.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.159049 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:55,394] Trial 28 finished with value: 0.771806167400881 and parameters: {'n_estimators': 171, 'learning_rate': 0.0009715613752755998, 'max_depth': 9}. Best is trial 23 with value: 0.9131277533039648.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.156822 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-10-01 05:11:58,421] Trial 29 finished with value: 0.8960352422907489 and parameters: {'n_estimators': 77, 'learning_rate': 0.09188410872195027, 'max_depth': 10}. Best is trial 23 with value: 0.9131277533039648.


[LightGBM] [Info] Number of positive: 12579, number of negative: 12579
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.161687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84072
[LightGBM] [Info] Number of data points in the train set: 25158, number of used features: 968
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




🏃 View run LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-35-92-223-149.us-west-2.compute.amazonaws.com:8000/#/experiments/543202711501021658/runs/f9df0b7d934345f2ba3694fd5d930677
🧪 View experiment at: http://ec2-35-92-223-149.us-west-2.compute.amazonaws.com:8000/#/experiments/543202711501021658
