# Online Payment Fraud Detection - Complete Study

This notebook replicates the comprehensive study from `Online_Payment_Fraud_Detection.ipynb`.

The study includes:
1. Exploratory Data Analysis (EDA)
2. Data Preprocessing (outliers, encoding, scaling)
3. Class Imbalance Handling (SMOTE)
4. Deep Neural Network Model Training
5. Model Evaluation

In [None]:
%load_ext autoreload
%autoreload 2

## 1. Import Libraries and Load Data

In [None]:
import os
import sys
from pathlib import Path

sys.path.append("..")

from src.data_ingestion import DataIngestion
from src.fraud_eda import FraudEDA
from src.fraud_preprocessing import FraudPreprocessing
from src.fraud_model import FraudDNNModel
from src.fraud_evaluation import FraudModelEvaluation
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load the fraud dataset
ingestion = DataIngestion()
df = ingestion.load_fraud_dataset()

print(f"Dataset loaded successfully!")
print(f"Shape: {df.shape}")

## 2. Exploratory Data Analysis (EDA)

In [None]:
# Initialize EDA
eda = FraudEDA(df)

# Print basic information
eda.print_basic_info()

# Print comprehensive summary report
eda.print_summary_report()

In [None]:
# Plot key feature distributions
eda.plot_feature_distribution('step', bins=10, color='skyblue')
eda.plot_feature_distribution('amount', bins=15, color='lightgreen')

# Analyze fraud by transaction type
crosstab = eda.analyze_fraud_by_type()
print(crosstab)

# Visualize fraud by type
eda.plot_fraud_by_type()

# Calculate fraud percentages
fraud_percentages = eda.calculate_fraud_percentages()
print("\nFraud Percentage by Transaction Type:")
for trans_type, pct in fraud_percentages.items():
    print(f"  {trans_type}: {pct:.2f}%")

## 3. Data Preprocessing

In [None]:
# Initialize preprocessing
preprocessor = FraudPreprocessing()

# Apply complete preprocessing pipeline
X, y = preprocessor.preprocess(
    df,
    target_col='isFraud',
    handle_outliers=True,
    handle_missing=True,
    remove_irrelevant=True,
    encode_categorical=True,
    scale_features=True,
    handle_imbalance=True,
    return_X_y=True
)

print(f"\nPreprocessing completed!")
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

## 4. Train-Validation-Test Split

In [None]:
# Split into train (80%), validation (10%), and test (10%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Training set: {X_train.shape[0]:,} samples")
print(f"Validation set: {X_val.shape[0]:,} samples")
print(f"Test set: {X_test.shape[0]:,} samples")
print(f"\nClass distribution in training set:")
print(y_train.value_counts())

## 5. Model Training

In [None]:
# Initialize and build the DNN model
input_dim = X_train.shape[1]
model = FraudDNNModel(
    input_dim=input_dim,
    hidden_layers=[128, 64, 32],
    dropout_rate=0.2,
    learning_rate=0.001,
    random_state=42
)

# Build the model
model.build_model()

# Print model summary
print(model.get_model_summary())

In [None]:
# Train the model
history = model.train(
    X_train, y_train,
    X_val=X_val,
    y_val=y_val,
    epochs=10,
    batch_size=32,
    verbose=1,
    early_stopping_patience=3,
    save_best_model=False
)

## 6. Model Evaluation

In [None]:
# Make predictions on test set
y_pred = model.predict(X_test, threshold=0.5)
y_pred_proba = model.predict_proba(X_test)

print(f"Predictions completed!")
print(f"Predicted shape: {y_pred.shape}")

# Initialize evaluation
evaluator = FraudModelEvaluation(y_test, y_pred, y_pred_proba)

# Generate full evaluation report
evaluation_report = evaluator.generate_full_report(save_dir="../reports/fraud_detection")

# Print key metrics
evaluator.print_key_metrics()

## 7. Save Model

In [None]:
# Create directory for models
model_dir = Path("../models/fraud_detection")
model_dir.mkdir(parents=True, exist_ok=True)

# Save the model
model_path = model_dir / "fraud_dnn_model.h5"
model.save_model(str(model_path))

print(f"\nModel saved successfully at: {model_path}")