# 04_backtesting_and_robustness

04 — Backtesting & Robustness (Costs, Turnover, Risk Metrics)
Goal: Compute portfolio performance for:

- Pure TA (pos_ta → equal-weight among active positions)
- Pure Fundamentals (w_fund)
- Hybrid (w_hybrid) Include transaction costs, turnover, and summary metrics. Save results to reports/.

### Setup

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 150)

REPO_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
DATA_PROCESSED = os.path.join(REPO_ROOT, "data", "processed")
REPORTS = os.path.join(REPO_ROOT, "reports")
os.makedirs(REPORTS, exist_ok=True)

signals_path = os.path.join(DATA_PROCESSED, "signals.parquet")
assert os.path.exists(signals_path), f"Missing {signals_path}. Run Notebook 03 first."

df = pd.read_parquet(signals_path)
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values(["ticker","date"]).reset_index(drop=True)

df.head()


### Compute returns per ticker



In [None]:
df["ret"] = df.groupby("ticker")["adj_close"].pct_change()
df["ret"] = df["ret"].fillna(0.0)

df[["date","ticker","adj_close","ret"]].head(10)


### Build TA weights (equal-weight among active TA positions)

In [None]:
# TA active membership
df["sig_ta_member"] = (df["pos_ta"] == 1).astype(int)

ta_count = df.groupby("date")["sig_ta_member"].transform("sum").replace(0, np.nan)
df["w_ta"] = np.where(df["sig_ta_member"] == 1, 1.0 / ta_count, 0.0)
df["w_ta"] = df["w_ta"].fillna(0.0)

# sanity
df.groupby("date")[["w_ta","w_fund","w_hybrid"]].sum().describe()


### Portfolio returns (gross, before costs)

In [None]:
def portfolio_returns(df, weight_col, ret_col="ret"):
    return df.groupby("date").apply(lambda g: np.sum(g[weight_col].values * g[ret_col].values)).rename(f"port_{weight_col}")

port_ta = portfolio_returns(df, "w_ta")
port_fund = portfolio_returns(df, "w_fund")
port_hybrid = portfolio_returns(df, "w_hybrid")

ports = pd.concat([port_ta, port_fund, port_hybrid], axis=1).fillna(0.0)
ports.head()


### Turnover + transaction costs

0.5 × sum(|w_t - w_{t-1}|) across all tickers (standard convention).

In [None]:
def daily_turnover(df, weight_col):
    w = df.pivot(index="date", columns="ticker", values=weight_col).fillna(0.0)
    dw = w.diff().abs()
    # 0.5 factor avoids double counting buys+sells for a fully invested portfolio
    turnover = 0.5 * dw.sum(axis=1)
    return turnover.rename(f"turnover_{weight_col}")

turn_ta = daily_turnover(df, "w_ta")
turn_fund = daily_turnover(df, "w_fund")
turn_hybrid = daily_turnover(df, "w_hybrid")

turn = pd.concat([turn_ta, turn_fund, turn_hybrid], axis=1).fillna(0.0)
turn.head()


### Apply costs

Set cost assumptions (you can tune later):

5 bps (0.05%) per 1.0 turnover is a reasonable “simple model”

In [None]:
COST_BPS = 5  # 5 basis points per unit turnover
cost_rate = COST_BPS / 10000.0

ports_net = ports.copy()
ports_net["net_w_ta"] = ports["port_w_ta"] - cost_rate * turn["turnover_w_ta"]
ports_net["net_w_fund"] = ports["port_w_fund"] - cost_rate * turn["turnover_w_fund"]
ports_net["net_w_hybrid"] = ports["port_w_hybrid"] - cost_rate * turn["turnover_w_hybrid"]

ports_net = ports_net[["net_w_ta","net_w_fund","net_w_hybrid"]]
ports_net.head()


### Performance metrics

In [None]:
TRADING_DAYS = 252

def max_drawdown(equity_curve):
    peak = equity_curve.cummax()
    dd = equity_curve / peak - 1.0
    return dd.min(), dd

def perf_stats(returns, name="strategy"):
    returns = returns.fillna(0.0)
    eq = (1.0 + returns).cumprod()
    total_return = eq.iloc[-1] - 1.0
    cagr = (eq.iloc[-1]) ** (TRADING_DAYS / len(eq)) - 1.0 if len(eq) > 0 else np.nan

    vol = returns.std() * np.sqrt(TRADING_DAYS)
    sharpe = (returns.mean() / returns.std()) * np.sqrt(TRADING_DAYS) if returns.std() != 0 else np.nan

    downside = returns[returns < 0].std() * np.sqrt(TRADING_DAYS)
    sortino = (returns.mean() / returns[returns < 0].std()) * np.sqrt(TRADING_DAYS) if returns[returns < 0].std() != 0 else np.nan

    mdd, dd_series = max_drawdown(eq)
    calmar = cagr / abs(mdd) if mdd != 0 else np.nan

    win_rate = (returns > 0).mean()

    return {
        "name": name,
        "total_return": total_return,
        "cagr": cagr,
        "volatility": vol,
        "sharpe": sharpe,
        "sortino": sortino,
        "max_drawdown": mdd,
        "calmar": calmar,
        "win_rate_daily": win_rate,
        "days": len(eq)
    }

stats = pd.DataFrame([
    perf_stats(ports_net["net_w_ta"], "TA (EMA5+Stoch+Candles)"),
    perf_stats(ports_net["net_w_fund"], "Fundamentals (Top-N Rank)"),
    perf_stats(ports_net["net_w_hybrid"], "Hybrid (Fund+TA timing)")
])

stats


### Equity curves

In [None]:
eq = pd.DataFrame({
    "TA": (1 + ports_net["net_w_ta"]).cumprod(),
    "Fundamentals": (1 + ports_net["net_w_fund"]).cumprod(),
    "Hybrid": (1 + ports_net["net_w_hybrid"]).cumprod()
})

eq.plot(figsize=(12,4), title="Equity Curves (Net of Costs)")
plt.show()


### Drawdowns

In [None]:
def drawdown_series(returns):
    eq = (1 + returns).cumprod()
    peak = eq.cummax()
    return eq/peak - 1

dd = pd.DataFrame({
    "TA": drawdown_series(ports_net["net_w_ta"]),
    "Fundamentals": drawdown_series(ports_net["net_w_fund"]),
    "Hybrid": drawdown_series(ports_net["net_w_hybrid"])
})

dd.plot(figsize=(12,3), title="Drawdowns")
plt.show()


### Robustness checks (simple but credible)

A) Parameter sensitivity: Stochastic oversold threshold

We’ll test a couple oversold thresholds quickly by filtering entries (approx).
This is not a full re-run of Notebook 03, but a quick sanity lens.

In [None]:
# Simple lens: how often signals trigger at different oversold thresholds?
k_prev = df.groupby("ticker")["stoch_k_14"].shift(1)
d_prev = df.groupby("ticker")["stoch_d_14_3"].shift(1)
k = df["stoch_k_14"]
d = df["stoch_d_14_3"]

stoch_cross_up = (k_prev < d_prev) & (k > d)
trend_ok = (df["adj_close"] > df["ema20"])
bull_candle = (df["is_hammer"] == 1) | (df["is_bull_engulf"] == 1)

thresholds = [10, 20, 30]
rows = []
for th in thresholds:
    entry = trend_ok & stoch_cross_up & (k_prev < th) & bull_candle
    rate = entry.groupby(df["date"]).sum().mean()  # avg signals per day
    rows.append({"oversold_threshold": th, "avg_entries_per_day": rate})

pd.DataFrame(rows)


B) Turnover summary (Code)

In [None]:
turn_summary = pd.DataFrame({
    "TA_avg_turnover": turn["turnover_w_ta"].mean(),
    "Fund_avg_turnover": turn["turnover_w_fund"].mean(),
    "Hybrid_avg_turnover": turn["turnover_w_hybrid"].mean(),
    "TA_median_turnover": turn["turnover_w_ta"].median(),
    "Fund_median_turnover": turn["turnover_w_fund"].median(),
    "Hybrid_median_turnover": turn["turnover_w_hybrid"].median()
}, index=[0])

turn_summary


### Save outputs to reports

In [None]:
stats_path = os.path.join(REPORTS, "performance_metrics.csv")
eq_path = os.path.join(REPORTS, "equity_curves.csv")
dd_path = os.path.join(REPORTS, "drawdowns.csv")

stats.to_csv(stats_path, index=False)
eq.to_csv(eq_path)
dd.to_csv(dd_path)

print("Saved:", stats_path)
print("Saved:", eq_path)
print("Saved:", dd_path)
