#  Online Fraud Detection - Deep Learning Models
This notebook trains 4 deep learning models and an ensemble model to detect online fraudulent transactions.

##  Step 1: Load and Preprocess Data

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load dataset
df = pd.read_csv("enhanced_online_fraud_dataset.csv")
df.head()



Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,...,is_receiver_zero_before,is_receiver_exact_amount,is_large_txn,org_to_dest_same,sender_is_customer,receiver_is_customer,receiver_is_merchant,risk_combo,hour,is_night
0,1,3,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
1,1,3,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1
2,1,4,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,...,1,0,0,0,1,1,0,0,1,1
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,...,0,0,0,0,1,1,0,0,1,1
4,1,3,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,...,1,0,0,0,1,0,1,0,1,1


In [None]:
# Prepare features and labels
X = df.drop(columns=['isFraud', 'nameOrig', 'nameDest', 'step'])
y = df['isFraud']

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to tensors
import tensorflow as tf
X_train_tf = tf.convert_to_tensor(X_train_scaled, dtype=tf.float32)
X_test_tf = tf.convert_to_tensor(X_test_scaled, dtype=tf.float32)
y_train_tf = tf.convert_to_tensor(y_train.values, dtype=tf.float32)
y_test_tf = tf.convert_to_tensor(y_test.values, dtype=tf.float32)

: 

## Step 2: Build Deep Learning Models

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LSTM, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

input_dim = X_train.shape[1]
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Model 1: Simple MLP
def build_simple_mlp():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_dim,)),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Model 2: MLP with Dropout
def build_dropout_mlp():
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_dim,)),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Model 3: MLP with Batch Normalization
def build_batchnorm_mlp():
    model = Sequential([
        Dense(128, input_shape=(input_dim,)),
        BatchNormalization(),
        tf.keras.layers.ReLU(),
        Dense(64),
        BatchNormalization(),
        tf.keras.layers.ReLU(),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Model 4: BiLSTM (optional sequence modeling)
def build_lstm_model():
    model = Sequential([
        tf.keras.layers.Reshape((input_dim, 1), input_shape=(input_dim,)),
        Bidirectional(LSTM(32)),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

: 

## Step 3: Train All Models

In [None]:
models = {
    'Simple MLP': build_simple_mlp(),
    'Dropout MLP': build_dropout_mlp(),
    'BatchNorm MLP': build_batchnorm_mlp(),
    'BiLSTM': build_lstm_model()
}

history = {}
for name, model in models.items():
    print(f"\n📌 Training: {name}")
    hist = model.fit(X_train_tf, y_train_tf, validation_split=0.2, epochs=5, batch_size=256, callbacks=[early_stop], verbose=1)
    history[name] = hist


📌 Training: Simple MLP
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

📌 Training: Dropout MLP
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

📌 Training: BatchNorm MLP
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

📌 Training: BiLSTM
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


: 

## Step 4: Evaluate Models

In [None]:
from sklearn.metrics import classification_report, roc_auc_score

for name, model in models.items():
    preds = model.predict(X_test_tf)
    preds_bin = (preds > 0.5).astype(int)
    print(f"\n📌 {name}")
    print(classification_report(y_test, preds_bin))
    print("ROC AUC:", roc_auc_score(y_test, preds))


📌 Simple MLP
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       0.98      0.77      0.86      1643

    accuracy                           1.00   1272524
   macro avg       0.99      0.89      0.93   1272524
weighted avg       1.00      1.00      1.00   1272524

ROC AUC: 0.9980663310120185

📌 Dropout MLP
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       0.98      0.74      0.84      1643

    accuracy                           1.00   1272524
   macro avg       0.99      0.87      0.92   1272524
weighted avg       1.00      1.00      1.00   1272524

ROC AUC: 0.9969519579552686

📌 BatchNorm MLP
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       0.98      0.72      0.83      1643

    accuracy                           1.00   1272524
   macro avg       0.99   

: 

## Step 5: DL Ensemble (Soft Averaging)

In [None]:
# Average predictions
ensemble_preds = sum(model.predict(X_test_tf) for model in models.values()) / len(models)
ensemble_bin = (ensemble_preds > 0.5).astype(int)

print("📊 DL Ensemble Performance")
print(classification_report(y_test, ensemble_bin))
print("ROC AUC:", roc_auc_score(y_test, ensemble_preds))

📊 DL Ensemble Performance
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       0.98      0.75      0.85      1643

    accuracy                           1.00   1272524
   macro avg       0.99      0.87      0.92   1272524
weighted avg       1.00      1.00      1.00   1272524

ROC AUC: 0.9981764661504772


: 

In [None]:
# Save any DL model
models['ensemble_preds'].save('fraud_dl_model.h5')
print("✅ DL model saved as 'fraud_dl_model.h5'")


KeyError: 'ensemble_preds'

: 

In [None]:
from tensorflow.keras.models import load_model

# Load model
dl_model = load_model('fraud_dl_model.h5')

# Predict on test samples
sample = X_test[:20]
sample_scaled = scaler.transform(sample)
dl_preds = dl_model.predict(sample_scaled)

for i, prob in enumerate(dl_preds):
    print(f"Sample {i+1} - Fraud Probability: {prob[0]:.4f}")
    print(f"Sample {i+1} - Fraud Prediction: {'Fraud' if prob[0] > 0.5 else 'Not Fraud'}")


: 

: 

: 

: 