# 7. Outlier Analysis

Detect profitable trades beyond 3-sigma and summarize insights.


In [None]:

import pandas as pd
from pathlib import Path
import numpy as np

DATA_DIR = Path("../data")
df = pd.read_csv(DATA_DIR/"nifty_features_5min.csv", parse_dates=["timestamp"])

df = df.dropna(subset=["spot_ret","ema_5","ema_15"]).reset_index(drop=True)
if "regime" not in df.columns:
    df["regime"] = 0

df["cross_up"] = (df["ema_5"] > df["ema_15"]) & (df["ema_5"].shift(1) <= df["ema_15"].shift(1))
df["cross_dn"] = (df["ema_5"] < df["ema_15"]) & (df["ema_5"].shift(1) >= df["ema_15"].shift(1))

df["signal"] = 0
df.loc[df["cross_up"] & (df["regime"] == 1), "signal"] = 1
df.loc[df["cross_dn"] & (df["regime"] == -1), "signal"] = -1

df["position"] = df["signal"].replace(0, method="ffill")
df["pnl"] = df["position"].shift() * df["spot_ret"]

mu = df["pnl"].mean()
sd = df["pnl"].std(ddof=0) if df["pnl"].std(ddof=0) != 0 else 1e-9
df["z"] = (df["pnl"] - mu) / sd

outliers = df[df["z"] > 3].copy()
print("Outliers:", len(outliers), "of", len(df))
outliers[["timestamp","pnl","z","regime"]].head()


In [None]:

from pathlib import Path

RES_DIR = Path("../results")
RES_DIR.mkdir(exist_ok=True)

pct = (len(outliers)/len(df))*100 if len(df) else 0
summary = [
    f"Outliers (z>3): {len(outliers)}",
    f"Total rows: {len(df)}",
    f"Outlier percentage: {pct:.2f}%",
    f"Outlier mean pnl: {outliers['pnl'].mean() if len(outliers) else 0:.6f}",
    f"Overall mean pnl: {df['pnl'].mean():.6f}",
]
(Path("../results")/"outlier_insights.txt").write_text("\n".join(summary))
print("Saved results/outlier_insights.txt")
