In [1]:
import dagshub
import mlflow
mlflow.set_tracking_uri("https://dagshub.com/aryan0147/Capstone-Project-2.mlflow")

dagshub.init(repo_owner='aryan0147', repo_name='Capstone-Project-2', mlflow=True)

In [2]:
# Set or create an experiment
mlflow.set_experiment("ML-Algos")

2025/02/10 19:51:10 INFO mlflow.tracking.fluent: Experiment with name 'ML-Algos' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/d43192cc72bb48deb03d07938721755b', creation_time=1739197271560, experiment_id='12', last_update_time=1739197271560, lifecycle_stage='active', name='ML-Algos', tags={}>

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier
import mlflow
import mlflow.sklearn
import optuna

In [6]:
df = pd.read_csv('../reddit_preprocessing.csv').dropna()
df.shape

(36662, 2)

In [7]:
import mlflow
import optuna
import numpy as np
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

# Step 3: TF-IDF vectorization
ngram_range = (1, 3)  # Trigram
max_features = 10000  # Max features for TF-IDF
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)

# **Train-test split happens BEFORE any resampling or class balancing**
X_train, X_test, y_train, y_test = train_test_split(
    df['clean_comment'], df['category'], test_size=0.2, random_state=42, stratify=df['category']
)

# Step 4: Fit TF-IDF on training data only
X_train_vec = vectorizer.fit_transform(X_train)  # Fit on training data
X_test_vec = vectorizer.transform(X_test)  # Transform test data without refitting

# Step 5: Compute class weights instead of SMOTE
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
class_weight_dict = {cls: weight for cls, weight in zip(np.unique(y_train), class_weights)}

# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"{model_name}_ClassWeights_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")
        mlflow.log_param("algo_name", model_name)

        # Train model
        model.fit(X_train, y_train, sample_weight=[class_weight_dict[y] for y in y_train])
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 10)

    model = LGBMClassifier(
        n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, 
        class_weight=class_weight_dict, random_state=42
    )

    return accuracy_score(y_test, model.fit(X_train_vec, y_train).predict(X_test_vec))

# Step 7: Run Optuna for LightGBM, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    # Get the best parameters and log only the best model
    best_params = study.best_params
    best_model = LGBMClassifier(
        n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'],
        max_depth=best_params['max_depth'], class_weight=class_weight_dict, random_state=42
    )

    # Log the best model with MLflow
    log_mlflow("LightGBM", best_model, X_train_vec, X_test_vec, y_train, y_test)

# Run the experiment for LightGBM
run_optuna_experiment()


[I 2025-02-10 19:51:56,032] A new study created in memory with name: no-name-2c880757-ebd8-4dbc-8c44-3ede5f067f26


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100378 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:51:58,744] Trial 0 finished with value: 0.565525705713896 and parameters: {'n_estimators': 158, 'learning_rate': 0.004523665305133133, 'max_depth': 3}. Best is trial 0 with value: 0.565525705713896.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.099380 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:02,489] Trial 1 finished with value: 0.582708304922951 and parameters: {'n_estimators': 248, 'learning_rate': 0.00429956614954293, 'max_depth': 3}. Best is trial 1 with value: 0.582708304922951.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099456 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:05,492] Trial 2 finished with value: 0.5554343379244511 and parameters: {'n_estimators': 104, 'learning_rate': 0.00028948730343299434, 'max_depth': 5}. Best is trial 1 with value: 0.582708304922951.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.092421 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:13,017] Trial 3 finished with value: 0.6960316378017183 and parameters: {'n_estimators': 202, 'learning_rate': 0.014986594041322902, 'max_depth': 7}. Best is trial 3 with value: 0.6960316378017183.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.091189 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:19,947] Trial 4 finished with value: 0.6099822719214509 and parameters: {'n_estimators': 139, 'learning_rate': 0.00025278045982934244, 'max_depth': 10}. Best is trial 3 with value: 0.6960316378017183.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.102849 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:29,393] Trial 5 finished with value: 0.598527205782081 and parameters: {'n_estimators': 182, 'learning_rate': 0.001077077655041351, 'max_depth': 8}. Best is trial 3 with value: 0.6960316378017183.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.096733 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:36,716] Trial 6 finished with value: 0.6357561707350334 and parameters: {'n_estimators': 166, 'learning_rate': 0.005591778932096012, 'max_depth': 6}. Best is trial 3 with value: 0.6960316378017183.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:50,711] Trial 7 finished with value: 0.610255011591436 and parameters: {'n_estimators': 284, 'learning_rate': 0.0017117909302139442, 'max_depth': 7}. Best is trial 3 with value: 0.6960316378017183.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100765 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:54,431] Trial 8 finished with value: 0.5389335878903586 and parameters: {'n_estimators': 134, 'learning_rate': 0.0006607901979886339, 'max_depth': 4}. Best is trial 3 with value: 0.6960316378017183.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.107210 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:52:58,421] Trial 9 finished with value: 0.597708986772126 and parameters: {'n_estimators': 82, 'learning_rate': 0.007358372681483365, 'max_depth': 6}. Best is trial 3 with value: 0.6960316378017183.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.106439 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:53:07,660] Trial 10 finished with value: 0.8058093549706805 and parameters: {'n_estimators': 225, 'learning_rate': 0.06631891259971366, 'max_depth': 9}. Best is trial 10 with value: 0.8058093549706805.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.105133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:53:16,414] Trial 11 finished with value: 0.800490931405973 and parameters: {'n_estimators': 210, 'learning_rate': 0.06508737783124173, 'max_depth': 9}. Best is trial 10 with value: 0.8058093549706805.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.095136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:53:25,862] Trial 12 finished with value: 0.8304922951043229 and parameters: {'n_estimators': 227, 'learning_rate': 0.09834849616801479, 'max_depth': 10}. Best is trial 12 with value: 0.8304922951043229.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.105290 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:53:36,194] Trial 13 finished with value: 0.8303559252693304 and parameters: {'n_estimators': 242, 'learning_rate': 0.09002942387006788, 'max_depth': 10}. Best is trial 12 with value: 0.8304922951043229.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112755 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:53:49,144] Trial 14 finished with value: 0.7717168962225556 and parameters: {'n_estimators': 278, 'learning_rate': 0.026859613146240312, 'max_depth': 10}. Best is trial 12 with value: 0.8304922951043229.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.097816 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:53:58,772] Trial 15 finished with value: 0.833492431474158 and parameters: {'n_estimators': 255, 'learning_rate': 0.09959343454134817, 'max_depth': 9}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:54:07,164] Trial 16 finished with value: 0.7553525160234557 and parameters: {'n_estimators': 259, 'learning_rate': 0.026779338426869883, 'max_depth': 8}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103081 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:54:16,809] Trial 17 finished with value: 0.7777171689622255 and parameters: {'n_estimators': 297, 'learning_rate': 0.030761942226088326, 'max_depth': 9}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.091009 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:54:25,258] Trial 18 finished with value: 0.6960316378017183 and parameters: {'n_estimators': 221, 'learning_rate': 0.011900817729696643, 'max_depth': 8}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.089309 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:54:31,852] Trial 19 finished with value: 0.7693986090276831 and parameters: {'n_estimators': 193, 'learning_rate': 0.04256797547588724, 'max_depth': 9}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093250 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:54:34,628] Trial 20 finished with value: 0.6466657575344333 and parameters: {'n_estimators': 50, 'learning_rate': 0.013077868636398744, 'max_depth': 10}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.101217 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:54:43,390] Trial 21 finished with value: 0.832401472794218 and parameters: {'n_estimators': 244, 'learning_rate': 0.08842905425707494, 'max_depth': 10}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.099921 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:54:52,901] Trial 22 finished with value: 0.8284467475794355 and parameters: {'n_estimators': 264, 'learning_rate': 0.07999140289486636, 'max_depth': 10}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102553 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:55:00,561] Trial 23 finished with value: 0.8302195554343379 and parameters: {'n_estimators': 242, 'learning_rate': 0.09825646316929018, 'max_depth': 9}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.115869 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:55:08,522] Trial 24 finished with value: 0.775944361107323 and parameters: {'n_estimators': 234, 'learning_rate': 0.04195027213271016, 'max_depth': 8}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:55:23,264] Trial 25 finished with value: 0.6099822719214509 and parameters: {'n_estimators': 269, 'learning_rate': 0.0001235369490551048, 'max_depth': 10}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:55:32,536] Trial 26 finished with value: 0.781399154507023 and parameters: {'n_estimators': 220, 'learning_rate': 0.04509051973822032, 'max_depth': 9}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097393 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:55:44,040] Trial 27 finished with value: 0.7290331378699032 and parameters: {'n_estimators': 291, 'learning_rate': 0.018394702135690586, 'max_depth': 7}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.087251 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:55:52,545] Trial 28 finished with value: 0.8058093549706805 and parameters: {'n_estimators': 253, 'learning_rate': 0.052311772263506306, 'max_depth': 10}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.092190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


[I 2025-02-10 19:56:00,347] Trial 29 finished with value: 0.6586663030137734 and parameters: {'n_estimators': 152, 'learning_rate': 0.00830055313659483, 'max_depth': 8}. Best is trial 15 with value: 0.833492431474158.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101329 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130941
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4402
[LightGBM] [Info] Start training from score -1.204413
[LightGBM] [Info] Start training from score -1.425359
[LightGBM] [Info] Start training from score -0.777160




🏃 View run LightGBM_ClassWeights_TFIDF_Trigrams at: https://dagshub.com/aryan0147/Capstone-Project-2.mlflow/#/experiments/12/runs/9700ec73a91e45e59d6afd1e0c6b5a33
🧪 View experiment at: https://dagshub.com/aryan0147/Capstone-Project-2.mlflow/#/experiments/12
