# ReconGuard Demo Skeleton

This notebook provides a skeleton for Exploratory Data Analysis (EDA), model metrics evaluation, and reconstruction error plotting for the ReconGuard anomaly detection system.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add src to path
sys.path.append(os.path.join(os.getcwd(), '..'))
from src.features import create_features

## 1. Load Data & Feature Engineering

In [None]:
# Load your dataset here
# df = pd.read_csv('../data/transactions.csv')

# For demo purposes, let's generate some dummy data
dates = pd.date_range(start='2023-01-01', periods=1000, freq='H')
df = pd.DataFrame({
    'user_id': np.random.choice(['user_1', 'user_2', 'user_3'], 1000),
    'merchant_id': np.random.choice(['merch_A', 'merch_B', 'merch_C'], 1000),
    'amount': np.random.exponential(scale=100, size=1000),
    'timestamp': dates
})

# Apply feature engineering
df_features = create_features(df)
df_features.head()

## 2. Exploratory Data Analysis (EDA)

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df_features['amount'], bins=50, kde=True)
plt.title('Distribution of Transaction Amounts')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(x='time_of_day', data=df_features)
plt.title('Transactions by Time of Day Bucket')
plt.xlabel('Time of Day (0:Night, 1:Morning, 2:Afternoon, 3:Evening)')
plt.show()

## 3. Reconstruction Error Analysis

Assuming we have a model and have scored the transactions (or loaded scored data).

In [None]:
# Load scored data if available
# df_scored = pd.read_csv('../scored_transactions.csv')

# For demo, generate dummy reconstruction errors
df_features['reconstruction_error'] = np.random.beta(2, 5, 1000) * 0.5

# Plot reconstruction error distribution
plt.figure(figsize=(10, 6))
sns.histplot(df_features['reconstruction_error'], bins=50, kde=True, color='red')
plt.title('Distribution of Reconstruction Errors')
plt.show()

In [None]:
# Define a threshold for anomalies
threshold = df_features['reconstruction_error'].quantile(0.95)
print(f"Anomaly Threshold (95th percentile): {threshold:.4f}")

anomalies = df_features[df_features['reconstruction_error'] > threshold]
print(f"Number of anomalies found: {len(anomalies)}")

In [None]:
# Visualize Anomalies vs Normal
plt.figure(figsize=(12, 6))
plt.scatter(df_features.index, df_features['reconstruction_error'], alpha=0.5, label='Normal')
plt.scatter(anomalies.index, anomalies['reconstruction_error'], color='red', label='Anomaly')
plt.axhline(y=threshold, color='k', linestyle='--', label='Threshold')
plt.title('Reconstruction Error over Time')
plt.legend()
plt.show()