In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from lightgbm import LGBMClassifier
import joblib

# Load data
df = pd.read_csv("../data/Loan_default.csv")

# Separate features and target
X = df.drop(columns=["Default", "LoanID"])
y = df["Default"]

# Column types
num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object"]).columns

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
    ]
)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# FINAL LightGBM model (best balance of speed + accuracy)
lgbm_clf = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", LGBMClassifier(
        n_estimators=300,
        learning_rate=0.05,
        max_depth=-1,
        num_leaves=31,
        subsample=0.8,
        colsample_bytree=0.8,
        class_weight="balanced",
        random_state=42
    ))
])

# Train
lgbm_clf.fit(X_train, y_train)

print("Final LightGBM model trained successfully")


[LightGBM] [Info] Number of positive: 23722, number of negative: 180555
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011760 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1330
[LightGBM] [Info] Number of data points in the train set: 204277, number of used features: 31
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
Final LightGBM model trained successfully


In [3]:
import joblib
import os

os.makedirs("../models", exist_ok=True)

joblib.dump(lgbm_clf, "../models/lightgbm_model.pkl")

print("Model saved successfully at models/lightgbm_model.pkl")


Model saved successfully at models/lightgbm_model.pkl
