Step 1: Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier, GradientBoostingClassifier
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Load dataset
df = pd.read_csv("dataset.csv")

# Assuming the last column is the target variable
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Handle missing values in the target variable (y)
# Option 1: Remove rows with missing values in 'y'
df = df.dropna(subset=[df.columns[-1]])  # Drop rows with NaN in the last column
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Binning the target variable into discrete classes
# Define the bins and labels
num_bins = 5  # Number of bins to create
bins = np.linspace(y.min(), y.max(), num_bins + 1)  # Create bins with equal intervals
labels = range(num_bins)  # Labels for the bins

# Apply binning to create discrete target variable
y = pd.cut(y, bins=bins, labels=labels, include_lowest=True)


# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Replace any spaces or special characters in column names
# with underscores
X_train.columns = X_train.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)
X_test.columns = X_test.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)



Step 2: Define Base Models

In [2]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Base models
base_models = [
    ("lightgbm", lgb.LGBMClassifier(n_estimators=200, learning_rate=0.05, random_state=42)),
    ("random_forest", RandomForestClassifier(n_estimators=200, random_state=42)),
    ("xgboost", XGBClassifier(n_estimators=200, learning_rate=0.05, random_state=42))
]



Step 3: Create Stacking Classifier with Gradient Boosting as Meta Model


In [3]:
# Meta model (Final Estimator)
meta_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

# Stacking Model
stacked_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Train the stacked model
stacked_model.fit(X_train, y_train)

# Predictions
stacked_pred = stacked_model.predict(X_test)

# Accuracy
stacked_acc = accuracy_score(y_test, stacked_pred)
print(f"🔥 Advanced Stacking Model Accuracy: {stacked_acc:.4f}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020225 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4838
[LightGBM] [Info] Number of data points in the train set: 342100, number of used features: 24
[LightGBM] [Info] Start training from score -0.000015
[LightGBM] [Info] Start training from score -11.356564
[LightGBM] [Info] Start training from score -12.742858




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.028740 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4843
[LightGBM] [Info] Number of data points in the train set: 273680, number of used features: 24
[LightGBM] [Info] Start training from score -0.000015
[LightGBM] [Info] Start training from score -11.421103
[LightGBM] [Info] Start training from score -12.519715
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4835
[LightGBM] [Info] Number of data points in the train set: 273680, number of used features: 24
[LightGBM] [Info] Start training from score -0.000015
[LightGBM] [Info] Start training from score -11.421103
[LightGBM] [Info] Start training from score -12.519715
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.0275



🔥 Advanced Stacking Model Accuracy: 0.9999


Step 4: Save the model

In [4]:
import joblib
joblib.dump(stacked_model, "stacked_fraud_model.pkl")
print("✅ Model saved successfully!")

✅ Model saved successfully!
