# 🤖 Fraud Detection - Machine Learning Models
This notebook trains 4 ML models and an ensemble model to detect fraudulent transactions.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("enhanced_online_fraud_dataset.csv")
df.head(20)

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,...,is_receiver_zero_before,is_receiver_exact_amount,is_large_txn,org_to_dest_same,sender_is_customer,receiver_is_customer,receiver_is_merchant,risk_combo,hour,is_night
0,1,3,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
1,1,3,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
2,1,4,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,...,1,0,0,0,1,1,0,0,1,1
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,...,0,0,0,0,1,1,0,0,1,1
4,1,3,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
5,1,3,7817.71,C90045638,53860.0,46042.29,M573487274,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
6,1,3,7107.77,C154988899,183195.0,176087.23,M408069119,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
7,1,3,7861.64,C1912850431,176087.23,168225.59,M633326333,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
8,1,3,4024.36,C1265012928,2671.0,0.0,M1176932104,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
9,1,2,5337.77,C712410124,41720.0,36382.23,C195600860,41898.0,40348.79,0,...,0,0,0,0,1,1,0,0,1,1


## 📥 Step 1: Load and Preprocess Data

In [2]:

# Define features and target
X = df.drop(columns=['isFraud', 'nameOrig', 'nameDest', 'step'])
y = df['isFraud']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## 📊 Step 2: Train ML Models

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

# Initialize models
lr = LogisticRegression(class_weight='balanced', max_iter=1000)
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
lgbm = LGBMClassifier(class_weight='balanced')

# Fit models
lr.fit(X_train_scaled, y_train)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)
lgbm.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 6570, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.275811 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2597
[LightGBM] [Info] Number of data points in the train set: 5090096, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


## 📈 Step 3: Evaluate ML Models

In [4]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

models = {'Logistic Regression': lr, 'Random Forest': rf, 'XGBoost': xgb, 'LightGBM': lgbm}

for name, model in models.items():
    y_pred = model.predict(X_test_scaled if name == 'Logistic Regression' else X_test)
    print(f"\n📌 {name}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print("ROC AUC:", roc_auc_score(y_test, model.predict_proba(X_test_scaled if name == 'Logistic Regression' else X_test)[:, 1]))


📌 Logistic Regression
[[1217260   53621]
 [     43    1600]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98   1270881
           1       0.03      0.97      0.06      1643

    accuracy                           0.96   1272524
   macro avg       0.51      0.97      0.52   1272524
weighted avg       1.00      0.96      0.98   1272524

ROC AUC: 0.995587192845495

📌 Random Forest
[[1270881       0]
 [      4    1639]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       1.00      1.00      1.00      1643

    accuracy                           1.00   1272524
   macro avg       1.00      1.00      1.00   1272524
weighted avg       1.00      1.00      1.00   1272524

ROC AUC: 0.9987822595782435

📌 XGBoost
[[1270881       0]
 [      4    1639]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1  

## Step 4: ML Ensemble - Voting Classifier

In [5]:
from sklearn.ensemble import VotingClassifier

# Voting classifier using best models
ensemble = VotingClassifier(
    estimators=[('rf', rf), ('xgb', xgb), ('lgbm', lgbm)],
    voting='soft'
)
ensemble.fit(X_train, y_train)

y_pred_ens = ensemble.predict(X_test)
print("📊 Ensemble Model Performance")
print(confusion_matrix(y_test, y_pred_ens))
print(classification_report(y_test, y_pred_ens))
print("ROC AUC:", roc_auc_score(y_test, ensemble.predict_proba(X_test)[:, 1]))

[LightGBM] [Info] Number of positive: 6570, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.310830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2597
[LightGBM] [Info] Number of data points in the train set: 5090096, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
📊 Ensemble Model Performance
[[1270881       0]
 [      4    1639]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       1.00      1.00      1.00      1643

    accuracy                           1.00   1272524
   macro avg       1.00      1.00      1.00   1272524
weighted avg       1.00      1.00      1.00   1272524

ROC AUC: 0.9996766729338169


In [19]:
joblib.dump(X.columns.tolist(), "ml_features_used.pkl")


['ml_features_used.pkl']

In [20]:
joblib.dump(X_train.columns.tolist(), "ml_train_features_used.pkl")

['ml_train_features_used.pkl']

In [6]:
import joblib

# Save the ensemble model
joblib.dump(ensemble, 'fraud_ml_ensemble.pkl')
print("✅ ML Ensemble model saved as 'fraud_ml_ensemble.pkl'")

# Save the scaler too
joblib.dump(scaler, 'fraud_scaler.pkl')
print("✅ Scaler saved as 'fraud_scaler.pkl'")


✅ ML Ensemble model saved as 'fraud_ml_ensemble.pkl'
✅ Scaler saved as 'fraud_scaler.pkl'


In [26]:
# Load model and scaler
ensemble = joblib.load('fraud_ml_ensemble.pkl')
scaler = joblib.load('fraud_scaler.pkl')

# Example prediction
sample = X_test.iloc[:10]
sample_scaled = scaler.transform(sample)
probs = ensemble.predict_proba(sample_scaled)

for i, p in enumerate(probs):
    print(f"Sample {i+1} - Fraud Probability: {p[1]:.4f}")
    print("Predicted Class:", "Fraud" if p[1] > 0.5 else "Not Fraud")


Sample 1 - Fraud Probability: 0.0200
Predicted Class: Not Fraud
Sample 2 - Fraud Probability: 0.0935
Predicted Class: Not Fraud
Sample 3 - Fraud Probability: 0.4477
Predicted Class: Not Fraud
Sample 4 - Fraud Probability: 0.5272
Predicted Class: Fraud
Sample 5 - Fraud Probability: 0.8730
Predicted Class: Fraud
Sample 6 - Fraud Probability: 0.5339
Predicted Class: Fraud
Sample 7 - Fraud Probability: 0.5372
Predicted Class: Fraud
Sample 8 - Fraud Probability: 0.9156
Predicted Class: Fraud
Sample 9 - Fraud Probability: 0.5339
Predicted Class: Fraud
Sample 10 - Fraud Probability: 0.4474
Predicted Class: Not Fraud




# PREDICTION EXAMPLE

In [37]:
import joblib
import pandas as pd

# Load saved model and scaler
model = joblib.load("fraud_ml_ensemble.pkl")
scaler = joblib.load("fraud_scaler.pkl")
features = joblib.load("ml_features_used.pkl")


In [36]:
# Use the same columns used in training (23 features)
X_columns = df.drop(columns=['isFraud', 'nameOrig', 'nameDest', 'step']).columns.tolist()

# Get 5 sample rows for prediction
sample = df[X_columns].sample(20, random_state=42)

# Scale using saved scaler
sample_scaled = scaler.transform(sample)


In [35]:
# Predict probability and label
probs = model.predict_proba(sample_scaled)
labels = model.predict(sample_scaled)

# Output predictions
for i, (p, lbl) in enumerate(zip(probs, labels)):
    print(f"Sample {i+1}: Fraud Probability = {p[1]:.4f} → Predicted Label: {'FRAUD' if lbl == 1 else 'LEGIT'}")
    #print(sample.iloc[i])
    

Sample 1: Fraud Probability = 0.0200 → Predicted Label: LEGIT
Sample 2: Fraud Probability = 0.4474 → Predicted Label: LEGIT
Sample 3: Fraud Probability = 0.0600 → Predicted Label: LEGIT
Sample 4: Fraud Probability = 0.0301 → Predicted Label: LEGIT
Sample 5: Fraud Probability = 0.0935 → Predicted Label: LEGIT
Sample 6: Fraud Probability = 0.7917 → Predicted Label: FRAUD
Sample 7: Fraud Probability = 0.8764 → Predicted Label: FRAUD
Sample 8: Fraud Probability = 0.8730 → Predicted Label: FRAUD
Sample 9: Fraud Probability = 0.7917 → Predicted Label: FRAUD
Sample 10: Fraud Probability = 0.8663 → Predicted Label: FRAUD
Sample 11: Fraud Probability = 0.5342 → Predicted Label: FRAUD
Sample 12: Fraud Probability = 0.0767 → Predicted Label: LEGIT
Sample 13: Fraud Probability = 0.5372 → Predicted Label: FRAUD
Sample 14: Fraud Probability = 0.5304 → Predicted Label: FRAUD
Sample 15: Fraud Probability = 0.7917 → Predicted Label: FRAUD
Sample 16: Fraud Probability = 0.4474 → Predicted Label: LEGIT
S

