# User 30-Day Anomaly Dashboard

This notebook visualises the last 30 days of activity for a selected `user_id` using:

- Daily aggregated amount (sum) as a line.
- 7-day rolling mean and std, with a shaded anomaly band (mean ± 2 * std).
- Transaction points, with anomalies highlighted: either IsolationForest anomaly score above a learned threshold, or fuzzy risk category = 'high'.
- A subpanel showing per-transaction `anomaly_score` over time.

Running the final cell will also save:
- `user_last30_anomaly.png` (Matplotlib figure).
- `user_last30_anomaly.html` (Plotly interactive dashboard).


In [None]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.graph_objects as go
from plotly.subplots import make_subplots

BASE_DIR = Path('.')

tx_path = BASE_DIR / 'tx_with_fuzzy.csv'
meta_path = BASE_DIR / 'feature_importance.json'

df = pd.read_csv(tx_path)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Load anomaly score threshold from training metadata (if available)
try:
    with open(meta_path, 'r', encoding='utf-8') as f:
        meta = json.load(f)
    ANOM_THRESHOLD = float(meta.get('metrics', {}).get('anomaly_score_threshold', 0.5))
except FileNotFoundError:
    ANOM_THRESHOLD = 0.5

print(f'Loaded {len(df)} transactions from', tx_path)
print('Anomaly score threshold used:', ANOM_THRESHOLD)

# Set the user to inspect. Edit this cell to change the user.
available_users = sorted(df['user_id'].unique())
print('Available users (first 20):', ', '.join(available_users[:20]), '...')
USER_ID = available_users[0]  # <-- change this to inspect a different user, e.g. 'U0005'
print('Using USER_ID =', USER_ID)


In [None]:
# Filter data for selected user and last 30 days
user_df = df[df['user_id'] == USER_ID].copy()
user_df = user_df.sort_values('timestamp')

if user_df.empty:
    raise ValueError(f'No transactions found for user {USER_ID}')

end_time = user_df['timestamp'].max()
start_time = end_time - pd.Timedelta(days=30)

mask = user_df['timestamp'].between(start_time, end_time)
user_30 = user_df.loc[mask].copy()

if user_30.empty:
    raise ValueError(f'No transactions in last 30 days for user {USER_ID}')

# Daily aggregated amount (sum)
daily = (
    user_30
    .set_index('timestamp')
    .resample('D')['amount']
    .sum()
    .rename('amount_sum')
    .to_frame()
)

daily['rolling_mean'] = daily['amount_sum'].rolling(window=7, min_periods=1).mean()
daily['rolling_std'] = daily['amount_sum'].rolling(window=7, min_periods=1).std().fillna(0.0)
daily['upper_band'] = daily['rolling_mean'] + 2 * daily['rolling_std']
daily['lower_band'] = (daily['rolling_mean'] - 2 * daily['rolling_std']).clip(lower=0.0)

# Determine anomaly flags per transaction
is_if_anom = user_30['anomaly_score'] >= ANOM_THRESHOLD
if 'fuzzy_risk_category' in user_30.columns:
    is_fuzzy_high = user_30['fuzzy_risk_category'].astype(str).str.lower().eq('high')
else:
    is_fuzzy_high = pd.Series(False, index=user_30.index)

user_30['is_anomaly_flag'] = is_if_anom | is_fuzzy_high
anom = user_30[user_30['is_anomaly_flag']].copy()

print(f'User {USER_ID} has {len(user_30)} transactions in the last 30 days, of which {len(anom)} are flagged as anomalies.')


In [None]:
# Matplotlib dashboard: main panel + anomaly score subpanel
fig, (ax_main, ax_sub) = plt.subplots(
    2, 1, sharex=True, figsize=(12, 8), gridspec_kw={'height_ratios': [3, 1]}
)

# Main panel: daily sums, rolling mean, and bands
ax_main.plot(daily.index, daily['amount_sum'], label='Daily sum', color='C0')
ax_main.plot(daily.index, daily['rolling_mean'], label='7d mean', color='C1')
ax_main.fill_between(
    daily.index, daily['lower_band'], daily['upper_band'], color='C1', alpha=0.2, label='7d mean ± 2 std'
)

# Transaction points
ax_main.scatter(
    user_30['timestamp'], user_30['amount'], s=20, color='gray', alpha=0.7, label='Tx'
)

# Anomalies as red dots
if not anom.empty:
    ax_main.scatter(
        anom['timestamp'], anom['amount'], s=40, color='red', label='Anomaly'
    )

ax_main.set_ylabel('Amount (INR)')
ax_main.set_title(f'User {USER_ID} – last 30 days')
ax_main.legend(loc='upper left')

# Subpanel: anomaly_score over time
ax_sub.plot(
    user_30['timestamp'], user_30['anomaly_score'], color='C2', label='anomaly_score'
)
ax_sub.axhline(ANOM_THRESHOLD, color='red', linestyle='--', alpha=0.6, label='threshold')
ax_sub.set_ylabel('Anomaly score')
ax_sub.set_xlabel('Date')
ax_sub.legend(loc='upper left')

fig.autofmt_xdate()
fig.tight_layout()

out_png = BASE_DIR / 'user_last30_anomaly.png'
fig.savefig(out_png, dpi=150)
print('Saved matplotlib PNG to', out_png)

plt.show()

# Plotly interactive dashboard with subplots
fig_p = make_subplots(
    rows=2, cols=1, shared_xaxes=True, row_heights=[0.7, 0.3], vertical_spacing=0.08
)

# Row 1: daily sums and bands
fig_p.add_trace(
    go.Scatter(x=daily.index, y=daily['amount_sum'], mode='lines', name='Daily sum'),
    row=1, col=1,
)
fig_p.add_trace(
    go.Scatter(x=daily.index, y=daily['rolling_mean'], mode='lines', name='7d mean'),
    row=1, col=1,
)
# Bands via two traces with fill='tonexty'
fig_p.add_trace(
    go.Scatter(
        x=daily.index,
        y=daily['upper_band'],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        hoverinfo='skip',
    ),
    row=1, col=1,
)
fig_p.add_trace(
    go.Scatter(
        x=daily.index,
        y=daily['lower_band'],
        mode='lines',
        line=dict(width=0),
        fill='tonexty',
        name='Band ±2 std',
        hoverinfo='skip',
        opacity=0.2,
    ),
    row=1, col=1,
)

# Transaction points
fig_p.add_trace(
    go.Scatter(
        x=user_30['timestamp'],
        y=user_30['amount'],
        mode='markers',
        marker=dict(size=6, color='gray'),
        name='Tx',
    ),
    row=1, col=1,
)

# Anomaly points
if not anom.empty:
    fig_p.add_trace(
        go.Scatter(
            x=anom['timestamp'],
            y=anom['amount'],
            mode='markers',
            marker=dict(size=8, color='red'),
            name='Anomaly',
        ),
        row=1, col=1,
    )

# Row 2: anomaly score over time
fig_p.add_trace(
    go.Scatter(
        x=user_30['timestamp'],
        y=user_30['anomaly_score'],
        mode='lines+markers',
        marker=dict(size=5),
        name='anomaly_score',
    ),
    row=2, col=1,
)

fig_p.add_hline(
    y=ANOM_THRESHOLD, line=dict(color='red', dash='dash'), row=2, col=1
)

fig_p.update_layout(
    title=f'User {USER_ID} – last 30 days anomaly dashboard',
    xaxis_title='Date',
    yaxis_title='Amount (INR)',
    height=800,
)

out_html = BASE_DIR / 'user_last30_anomaly.html'
fig_p.write_html(out_html, include_plotlyjs='cdn')
print('Saved plotly HTML to', out_html)

fig_p.show()
