In [1]:
import os
import pandas as pd
import joblib
import time
import json
import lightgbm as lgb

In [2]:
from google.colab import drive
drive.mount('/content/drive')

# Paths
SPLITS_DIR = "/content/drive/My Drive/FDM Project Files/splits"
LGBM_DIR = "/content/drive/My Drive/FDM Project Files/models/lightgbm"
os.makedirs(LGBM_DIR, exist_ok=True)

# Load train data
X_train = pd.read_csv(os.path.join(SPLITS_DIR, "X_train.csv"))
y_train = pd.read_csv(os.path.join(SPLITS_DIR, "y_train.csv")).squeeze()

Mounted at /content/drive


In [12]:
# Save column order for evaluation
columns_path = os.path.join(LGBM_DIR, "lgbm_columns.json")
with open(columns_path, "w") as f:
    json.dump(list(X_train.columns), f)

# Define LightGBM model
lgbm = lgb.LGBMClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=-1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

In [14]:
# Train and time it
start_train = time.time()
lgbm.fit(X_train, y_train)
end_train = time.time()
train_time = round(end_train - start_train, 3)

print(f" LightGBM trained successfully in {train_time} seconds")

[LightGBM] [Info] Number of positive: 25008, number of negative: 22670
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005675 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 619
[LightGBM] [Info] Number of data points in the train set: 47678, number of used features: 31
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.524519 -> initscore=0.098153
[LightGBM] [Info] Start training from score 0.098153
 LightGBM trained successfully in 2.244 seconds


In [15]:
# Save model
model_path = os.path.join(LGBM_DIR, "lgbm_model.pkl")
joblib.dump(lgbm, model_path)
print(" Model saved at:", model_path)

# Save metadata (training time)
meta = {"Train_Time_sec": train_time}
with open(os.path.join(LGBM_DIR, "lgbm_meta.json"), "w") as f:
    json.dump(meta, f)
print(" Training metadata saved")

 Model saved at: /content/drive/My Drive/FDM Project Files/models/lightgbm/lgbm_model.pkl
 Training metadata saved
