# ðŸ“Š Signal Backtester

Runs the pipeline's anomaly detectors against historical yfinance data and evaluates signal quality.

**Metrics computed:**
- Signal frequency per ticker
- Post-signal return distribution (1m, 5m, 15m forward returns)
- Sharpe-like ratio per signal type
- Hit rate (% of signals followed by price move in expected direction)

---

In [None]:
import sys
sys.path.insert(0, '..')  # allow importing from project root

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import yfinance as yf
from datetime import datetime, timedelta
from collections import defaultdict

from src.fetcher import Tick
from src.signals import SignalDetector, SignalType

plt.style.use('dark_background')
plt.rcParams.update({
    'figure.facecolor':  '#0d1117',
    'axes.facecolor':    '#0d1117',
    'axes.edgecolor':    '#1e2a36',
    'axes.labelcolor':   '#c9d8e8',
    'xtick.color':       '#5a7a96',
    'ytick.color':       '#5a7a96',
    'grid.color':        '#1e2a36',
    'text.color':        '#c9d8e8',
    'font.family':       'monospace',
    'font.size':         10,
})

COLORS = {
    'PRICE_SPIKE':      '#ff4d6a',
    'VOLUME_SURGE':     '#4dabf7',
    'VOLATILITY_BURST': '#f5a623',
    'VWAP_DEVIATION':   '#00e5a0',
}

print('âœ“ Imports OK')

## 1. Configuration

In [None]:
# â”€â”€ Backtest parameters â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
SYMBOLS       = ['AAPL', 'MSFT', 'SPY', 'NVDA', 'TSLA']
LOOKBACK_DAYS = 30          # how many days of 1-min bars to download
HISTORY_BARS  = 20          # rolling window for signal detection
FORWARD_BARS  = [1, 5, 15]  # forward return horizons (in minutes)

# Signal detector thresholds (tune these!)
detector = SignalDetector(
    price_spike_zscore         = 2.5,
    volume_surge_multiplier    = 3.0,
    volatility_burst_multiplier= 2.5,
    vwap_deviation_pct         = 0.5,
    min_history                = HISTORY_BARS,
)

print(f'Symbols: {SYMBOLS}')
print(f'Lookback: {LOOKBACK_DAYS} days | History window: {HISTORY_BARS} bars')

## 2. Download Historical Data

In [None]:
def download_ticks(symbol: str, days: int) -> list[Tick]:
    """Download 1-min OHLCV bars and convert to Tick list."""
    df = yf.Ticker(symbol).history(period=f'{days}d', interval='1m')
    if df.empty:
        print(f'  âš  {symbol}: no data')
        return []

    # Compute rolling VWAP
    df['cum_vol']  = df['Volume'].cumsum()
    df['cum_vwap'] = (df['Close'] * df['Volume']).cumsum()
    df['vwap']     = df['cum_vwap'] / df['cum_vol'].replace(0, np.nan)

    ticks = []
    for ts, row in df.iterrows():
        ticks.append(Tick(
            symbol    = symbol,
            timestamp = ts.to_pydatetime().replace(tzinfo=None),
            price     = float(row['Close']),
            volume    = int(row['Volume']),
            open      = float(row['Open']),
            high      = float(row['High']),
            low       = float(row['Low']),
            vwap      = float(row['vwap']) if pd.notna(row['vwap']) else None,
        ))
    return ticks


all_ticks: dict[str, list[Tick]] = {}
for sym in SYMBOLS:
    print(f'Downloading {sym}...')
    all_ticks[sym] = download_ticks(sym, LOOKBACK_DAYS)
    print(f'  âœ“ {len(all_ticks[sym])} bars')

print(f'\nTotal bars: {sum(len(v) for v in all_ticks.values()):,}')

## 3. Run Signal Detection

In [None]:
def run_backtest(ticks: list[Tick], fwd_bars: list[int]) -> pd.DataFrame:
    """
    Run detector over tick history.
    For each signal fired, record the forward returns.
    """
    records = []
    n = len(ticks)

    for i in range(HISTORY_BARS, n):
        history = ticks[max(0, i - HISTORY_BARS): i + 1]
        tick    = ticks[i]
        signals = detector.detect(tick, history)

        for sig in signals:
            row = {
                'symbol':      sig.symbol,
                'signal_type': sig.signal_type,
                'ts':          sig.timestamp,
                'price':       sig.price,
                'bar_idx':     i,
            }
            for fwd in fwd_bars:
                future_idx = i + fwd
                if future_idx < n:
                    fwd_price    = ticks[future_idx].price
                    fwd_ret      = (fwd_price - sig.price) / sig.price * 100
                    row[f'ret_{fwd}m'] = round(fwd_ret, 4)
                else:
                    row[f'ret_{fwd}m'] = np.nan
            records.append(row)

    return pd.DataFrame(records)


results_by_sym: dict[str, pd.DataFrame] = {}
for sym, ticks in all_ticks.items():
    if len(ticks) < HISTORY_BARS + 15:
        print(f'âš  {sym}: insufficient data, skipping')
        continue
    df = run_backtest(ticks, FORWARD_BARS)
    results_by_sym[sym] = df
    print(f'{sym}: {len(df)} signals fired')

all_results = pd.concat(results_by_sym.values(), ignore_index=True)
print(f'\nTotal signals: {len(all_results)}')
all_results.head(10)

## 4. Signal Frequency Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle('Signal Frequency', fontsize=13, color='#c9d8e8', y=1.02)

# By type
type_counts = all_results['signal_type'].value_counts()
bars = axes[0].bar(
    [t.replace('_', '\n') for t in type_counts.index],
    type_counts.values,
    color=[COLORS.get(t, '#888') for t in type_counts.index],
    alpha=0.8, width=0.5
)
axes[0].set_title('By Signal Type', color='#c9d8e8')
axes[0].set_ylabel('Count')
for bar, val in zip(bars, type_counts.values):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
                 str(val), ha='center', va='bottom', fontsize=9)

# By symbol
sym_counts = all_results['symbol'].value_counts()
axes[1].bar(sym_counts.index, sym_counts.values, color='#4dabf7', alpha=0.7, width=0.5)
axes[1].set_title('By Symbol', color='#c9d8e8')
axes[1].set_ylabel('Count')

plt.tight_layout()
plt.savefig('signal_frequency.png', dpi=120, bbox_inches='tight', facecolor='#0d1117')
plt.show()

## 5. Forward Return Distribution

In [None]:
signal_types = all_results['signal_type'].unique()
n_types = len(signal_types)
fig, axes = plt.subplots(n_types, len(FORWARD_BARS), figsize=(14, 3.5 * n_types))
fig.suptitle('Forward Return Distributions by Signal Type', fontsize=13, color='#c9d8e8', y=1.01)

for row_idx, stype in enumerate(signal_types):
    subset = all_results[all_results['signal_type'] == stype]
    color  = COLORS.get(stype, '#888')
    for col_idx, fwd in enumerate(FORWARD_BARS):
        ax  = axes[row_idx][col_idx] if n_types > 1 else axes[col_idx]
        col = f'ret_{fwd}m'
        data = subset[col].dropna()

        ax.hist(data, bins=40, color=color, alpha=0.7, edgecolor='none')
        ax.axvline(0,            color='#fff',     linewidth=1, linestyle='--', alpha=0.5)
        ax.axvline(data.mean(),  color=color,      linewidth=1.5, label=f'mean={data.mean():.3f}%')

        ax.set_title(f'{stype.replace("_"," ")} | +{fwd}m', fontsize=9, color=color)
        ax.set_xlabel('Return (%)', fontsize=8)
        ax.legend(fontsize=8)

        # Annotate hit rate
        hit_rate = (data > 0).mean() * 100
        ax.text(0.98, 0.95, f'Hit rate: {hit_rate:.1f}%', transform=ax.transAxes,
                ha='right', va='top', fontsize=8, color='#c9d8e8')

plt.tight_layout()
plt.savefig('forward_returns.png', dpi=120, bbox_inches='tight', facecolor='#0d1117')
plt.show()

## 6. Summary Statistics

In [None]:
ret_cols = [f'ret_{f}m' for f in FORWARD_BARS]
rows = []

for stype in all_results['signal_type'].unique():
    sub = all_results[all_results['signal_type'] == stype]
    for col in ret_cols:
        data = sub[col].dropna()
        if len(data) == 0:
            continue
        sharpe = (data.mean() / data.std()) * np.sqrt(252 * 390) if data.std() != 0 else 0
        rows.append({
            'Signal Type':  stype,
            'Horizon':      col,
            'N':            len(data),
            'Mean Ret (%)': round(data.mean(), 4),
            'Std (%)):':    round(data.std(), 4),
            'Hit Rate (%)': round((data > 0).mean() * 100, 1),
            'Sharpe*':      round(sharpe, 2),
        })

summary = pd.DataFrame(rows)
print('\n=== Backtest Summary ===')
print(summary.to_string(index=False))
summary

---

## Notes

- **Sharpe\*** is annualised assuming 252 trading days Ã— 390 1-min bars/day. Treat as directional, not absolute.
- **Hit rate > 50%** suggests the signal has directional predictive power.
- **Lookahead bias**: detection uses only data available at signal time âœ“
- **Next steps**: add transaction costs, test on out-of-sample data, combine signals into a composite score.
