<a href="https://colab.research.google.com/github/aakarshit28/Fraud-detection-Model/blob/main/Fraud_detection_and_Monitoring_model_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install pandas scikit-learn torch

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
import sqlite3
from google.colab import files
import io
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch.optim as optim



In [11]:
# Function to upload and load dataset
def load_dataset():
    print("Please upload your CSV file (e.g., indian_fraud_dataset.csv)")
    uploaded = files.upload()  # Opens file picker in Colab
    if not uploaded:
        raise ValueError("No file uploaded.")
    file_name = list(uploaded.keys())[0]
    df = pd.read_csv(io.BytesIO(uploaded[file_name]))
    print(f"Dataset loaded with {len(df)} transactions from {file_name}.")
    return df

try:
    df = load_dataset()
except ValueError as e:
    print(e)
    df = None

if df is not None:
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['hour'] = df['timestamp'].dt.hour

    df['is_fraud'] = ((df['amount_inr'] > 500000) | (df['location'].isin(['Delhi', 'Bangalore']))).astype(int)
    print("\nFirst 5 rows of the preprocessed data:")
    print(df.head())

Please upload your CSV file (e.g., indian_fraud_dataset.csv)


Saving indian_fraud_dataset (1).csv to indian_fraud_dataset (1) (1).csv
Dataset loaded with 10000 transactions from indian_fraud_dataset (1) (1).csv.

First 5 rows of the preprocessed data:
   transaction_id customer_id    amount_inr           timestamp   location  \
0            8352     Cust080  36325.446078 2025-01-01 00:01:08     Mumbai   
1            3895     Cust052  41121.055974 2025-01-01 00:01:13       Pune   
2            8078     Cust022   4132.508824 2025-01-01 00:01:17     Mumbai   
3            6715     Cust016   2648.136361 2025-01-01 00:01:18    Chennai   
4            9951     Cust078  29384.712669 2025-01-01 00:01:31  Hyderabad   

   device  is_fraud  hour  
0  mobile         0     0  
1  mobile         0     0  
2  tablet         0     0  
3  tablet         0     0  
4  tablet         0     0  


In [13]:
# Supervised Fraud Detector model
class FraudDetector(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Unsupervised Autoencoder for anomaly detection
class Autoencoder(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 16),
            nn.ReLU(),
            nn.Linear(16, 8)
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, input_size)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [14]:
if df is not None:
    # Encode categorical features
    loc_encoder = {loc: idx for idx, loc in enumerate(df['location'].unique())}
    dev_encoder = {dev: idx for idx, dev in enumerate(df['device'].unique())}
    df['loc_idx'] = df['location'].map(loc_encoder)
    df['dev_idx'] = df['device'].map(dev_encoder)

    # Normalize the amount column
    mean_amount = df['amount_inr'].mean()
    std_amount = df['amount_inr'].std()
    df['amount_norm'] = (df['amount_inr'] - mean_amount) / std_amount

    # Select features and labels for training
    features = ['amount_norm', 'hour', 'loc_idx', 'dev_idx']
    X = df[features].values
    y = df['is_fraud'].values

    # Split data for supervised model
    X_train_sup, X_test_sup, y_train_sup, y_test_sup = train_test_split(X, y, test_size=0.2, random_state=42)
    train_sup_dataset = TensorDataset(torch.tensor(X_train_sup, dtype=torch.float32), torch.tensor(y_train_sup, dtype=torch.float32).unsqueeze(1))
    train_sup_loader = DataLoader(train_sup_dataset, batch_size=64, shuffle=True)

    # Training the supervised model
    sup_model = FraudDetector(input_size=len(features))
    criterion_sup = nn.BCELoss()
    optimizer_sup = optim.Adam(sup_model.parameters(), lr=0.001)

    print("\nStarting Supervised Model Training...")
    for epoch in range(10):
        for inputs, labels in train_sup_loader:
            optimizer_sup.zero_grad()
            outputs = sup_model(inputs)
            loss = criterion_sup(outputs, labels)
            loss.backward()
            optimizer_sup.step()
        print(f"Epoch {epoch+1}/10, Supervised Loss: {loss.item():.4f}")

    torch.save(sup_model.state_dict(), 'supervised_fraud_model.pth')
    print("Supervised model saved to 'supervised_fraud_model.pth'")

    # Training the autoencoder on normal transactions
    normal_transactions = df[df['is_fraud'] == 0]
    X_normal = normal_transactions[features].values
    train_auto_dataset = TensorDataset(torch.tensor(X_normal, dtype=torch.float32))
    train_auto_loader = DataLoader(train_auto_dataset, batch_size=64, shuffle=True)

    autoencoder_model = Autoencoder(input_size=len(features))
    criterion_auto = nn.MSELoss()
    optimizer_auto = optim.Adam(autoencoder_model.parameters(), lr=0.001)

    print("\nStarting Autoencoder Model Training...")
    for epoch in range(20):
        for inputs, in train_auto_loader:
            optimizer_auto.zero_grad()
            outputs = autoencoder_model(inputs)
            loss = criterion_auto(outputs, inputs)
            loss.backward()
            optimizer_auto.step()
        print(f"Epoch {epoch+1}/20, Autoencoder Loss: {loss.item():.4f}")

    torch.save(autoencoder_model.state_dict(), 'autoencoder_model.pth')
    print("Autoencoder model saved to 'autoencoder_model.pth'")


Starting Supervised Model Training...
Epoch 1/10, Supervised Loss: 0.6486
Epoch 2/10, Supervised Loss: 0.5400
Epoch 3/10, Supervised Loss: 0.4726
Epoch 4/10, Supervised Loss: 0.4739
Epoch 5/10, Supervised Loss: 0.5233
Epoch 6/10, Supervised Loss: 0.4828
Epoch 7/10, Supervised Loss: 0.3825
Epoch 8/10, Supervised Loss: 0.4151
Epoch 9/10, Supervised Loss: 0.3843
Epoch 10/10, Supervised Loss: 0.4142
Supervised model saved to 'supervised_fraud_model.pth'

Starting Autoencoder Model Training...
Epoch 1/20, Autoencoder Loss: 20.6203
Epoch 2/20, Autoencoder Loss: 0.5433
Epoch 3/20, Autoencoder Loss: 0.2303
Epoch 4/20, Autoencoder Loss: 0.2328
Epoch 5/20, Autoencoder Loss: 0.0648
Epoch 6/20, Autoencoder Loss: 0.0534
Epoch 7/20, Autoencoder Loss: 0.0531
Epoch 8/20, Autoencoder Loss: 0.0232
Epoch 9/20, Autoencoder Loss: 0.0025
Epoch 10/20, Autoencoder Loss: 0.0019
Epoch 11/20, Autoencoder Loss: 0.0014
Epoch 12/20, Autoencoder Loss: 0.0005
Epoch 13/20, Autoencoder Loss: 0.0006
Epoch 14/20, Autoen

In [15]:

def load_models(sup_model_path='supervised_fraud_model.pth', auto_model_path='autoencoder_model.pth', input_size=4):
    try:
        supervised_model = FraudDetector(input_size)
        supervised_model.load_state_dict(torch.load(sup_model_path))
        supervised_model.eval()

        autoencoder = Autoencoder(input_size)
        autoencoder.load_state_dict(torch.load(auto_model_path))
        autoencoder.eval()

        return supervised_model, autoencoder
    except FileNotFoundError:
        print("Error: Model files not found. Please run the training cell first to create them.")
        return None, None

# Simulate real-time fraud detection for a single transaction
def simulate_real_time(sup_model, autoencoder, trans, mean_amount, std_amount, loc_encoder, dev_encoder, anomaly_threshold=0.5, supervised_threshold=0.5):
    hour = pd.to_datetime(trans['timestamp']).hour
    loc_idx = loc_encoder.get(trans['location'], -1)
    dev_idx = dev_encoder.get(trans['device'], -1)
    amount_norm = (trans['amount_inr'] - mean_amount) / std_amount
    features = np.array([amount_norm, hour, loc_idx, dev_idx])
    features_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0)

    with torch.no_grad():
        sup_pred = sup_model(features_tensor).item()
        recon = autoencoder(features_tensor)
        loss = torch.mean((recon - features_tensor)**2).item()

    is_anomaly = loss > anomaly_threshold
    is_fraud = sup_pred > supervised_threshold or is_anomaly

    return is_fraud, sup_pred, is_anomaly, loss

# Process dataset for real-time fraud monitoring
def real_time_monitoring(df, supervised_model, autoencoder, mean_amount, std_amount, loc_encoder, dev_encoder):
    if supervised_model is None or autoencoder is None:
        return pd.DataFrame()

    fraud_detected = []
    print("Starting real-time fraud monitoring...")
    for idx, trans in df.iterrows():
        is_fraud, sup_pred, is_anomaly, loss = simulate_real_time(
            supervised_model, autoencoder, trans, mean_amount, std_amount, loc_encoder, dev_encoder
        )
        if is_fraud:
            fraud_detected.append({
                'transaction_id': trans['transaction_id'],
                'customer_id': trans['customer_id'],
                'amount_inr': trans['amount_inr'],
                'timestamp': trans['timestamp'],
                'location': trans['location'],
                'device': trans['device'],
                'fraud_reason': f"Supervised score: {sup_pred:.4f}, Anomaly: {is_anomaly}",
                'autoencoder_loss': f"{loss:.4f}"
            })
            print(f"ALERT: Fraud detected! Transaction ID: {trans['transaction_id']}, Customer: {trans['customer_id']}, Amount: ₹{trans['amount_inr']:.2f}")
    return pd.DataFrame(fraud_detected)

if df is not None:
    # Load the trained models
    supervised_model, autoencoder = load_models()

    # Run the monitoring simulation
    fraud_df = real_time_monitoring(df, supervised_model, autoencoder, mean_amount, std_amount, loc_encoder, dev_encoder)

    # Output results and save to file/database
    if not fraud_df.empty:
        print("\nFraud Detected Transactions/People:")
        print(fraud_df[['transaction_id', 'customer_id', 'amount_inr', 'timestamp', 'location', 'device', 'fraud_reason', 'autoencoder_loss']])
        fraud_df.to_csv('fraud_detected.csv', index=False)
        print("\nFraud detected data saved to 'fraud_detected.csv'.")

        conn = sqlite3.connect("fraud_detection_db.db")
        fraud_df.to_sql("fraud_transactions", conn, if_exists="replace", index=False)
        conn.commit()
        conn.close()
        print("Fraud transactions inserted into SQLite database.")
    else:
        print("No fraud detected in the dataset.")

Starting real-time fraud monitoring...
ALERT: Fraud detected! Transaction ID: 6990, Customer: Cust029, Amount: ₹351689.21
ALERT: Fraud detected! Transaction ID: 5998, Customer: Cust022, Amount: ₹929243.33
ALERT: Fraud detected! Transaction ID: 3892, Customer: Cust063, Amount: ₹387566.95
ALERT: Fraud detected! Transaction ID: 2565, Customer: Cust029, Amount: ₹174181.52
ALERT: Fraud detected! Transaction ID: 8899, Customer: Cust025, Amount: ₹1785728.23
ALERT: Fraud detected! Transaction ID: 415, Customer: Cust049, Amount: ₹805661.51
ALERT: Fraud detected! Transaction ID: 7567, Customer: Cust010, Amount: ₹7444932.60
ALERT: Fraud detected! Transaction ID: 3163, Customer: Cust041, Amount: ₹709831.75
ALERT: Fraud detected! Transaction ID: 8241, Customer: Cust075, Amount: ₹8926048.17
ALERT: Fraud detected! Transaction ID: 4734, Customer: Cust001, Amount: ₹7599443.59
ALERT: Fraud detected! Transaction ID: 6964, Customer: Cust001, Amount: ₹9255815.27
ALERT: Fraud detected! Transaction ID: 7103,