# Momentum Swing — Pattern Recognition + Backtest Starter

This notebook is a **starter framework** for: 
- building momentum/swing features on OHLC data
- defining pattern-based entry signals
- running a simple event-driven backtest with R-multiples

It is intentionally modular so you can iterate quickly.

Assumptions:
- `ohlc` table has `open_time` / `close_time` in epoch ms
- price columns are numeric
- time zone conversion is performed to `Australia/Sydney` for reporting/EDA; **trading logic should generally stay in UTC** to avoid DST edge cases.


In [None]:
import sqlite3
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd

DB_PATH = Path('ohlc.sqlite3')
TZ = 'Australia/Sydney'
pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 140)


## Load a symbol+interval dataset

In [None]:
def load_ohlc(exchange: str, symbol: str, interval: str, limit: int | None = None) -> pd.DataFrame:
    q = 'SELECT * FROM ohlc WHERE exchange=? AND symbol=? AND interval=? ORDER BY open_time'
    if limit is not None:
        q += f' LIMIT {int(limit)}'
    with sqlite3.connect(DB_PATH) as conn:
        df = pd.read_sql_query(q, conn, params=(exchange, symbol, interval))
    # numeric types
    for c in ['open','high','low','close','volume']:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')
    # time columns
    df['open_dt_utc'] = pd.to_datetime(pd.to_numeric(df['open_time'], errors='coerce'), unit='ms', utc=True)
    df['open_dt_syd'] = df['open_dt_utc'].dt.tz_convert(TZ)
    return df


# TODO: set these to a liquid symbol you track
EXCHANGE = 'binance'
SYMBOL = 'BTCUSDT'
INTERVAL = '1h'

df = load_ohlc(EXCHANGE, SYMBOL, INTERVAL)
df.tail()


## Feature engineering (momentum + volatility + regime)
Typical building blocks for swing momentum strategies:
- log returns
- moving averages (trend)
- ATR (volatility for stops / position sizing)
- breakout levels (highest close/high over lookback)
- volume expansion

In [None]:
def ema(s: pd.Series, span: int) -> pd.Series:
    return s.ewm(span=span, adjust=False).mean()


def atr(high: pd.Series, low: pd.Series, close: pd.Series, n: int = 14) -> pd.Series:
    prev_close = close.shift(1)
    tr = pd.concat([(high - low).abs(), (high - prev_close).abs(), (low - prev_close).abs()], axis=1).max(axis=1)
    return tr.rolling(n, min_periods=n).mean()


def add_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out['log_close'] = np.log(out['close'])
    out['ret_1'] = out['log_close'].diff()

    out['ema_20'] = ema(out['close'], 20)
    out['ema_50'] = ema(out['close'], 50)
    out['trend_up'] = out['ema_20'] > out['ema_50']

    out['atr_14'] = atr(out['high'], out['low'], out['close'], 14)
    out['atrp_14'] = out['atr_14'] / out['close']

    # Breakout helpers
    L = 20
    out['hh_20'] = out['high'].rolling(L, min_periods=L).max()
    out['hc_20'] = out['close'].rolling(L, min_periods=L).max()
    out['ll_20'] = out['low'].rolling(L, min_periods=L).min()

    # Volume expansion (optional)
    if 'volume' in out.columns:
        out['vol_sma_20'] = out['volume'].rolling(20, min_periods=20).mean()
        out['vol_z_20'] = (out['volume'] - out['vol_sma_20']) / out['volume'].rolling(20, min_periods=20).std()

    return out

df = add_features(df)
df[['open_dt_syd','open','high','low','close','ema_20','ema_50','atr_14','atrp_14','hh_20']].tail(10)


## Pattern definitions (examples)
Below are a few **starter** momentum swing patterns you can test:

1. **Trend + Breakout**: trend up AND close breaks prior N-bar high.
2. **Pullback to EMA**: trend up AND price pulls back near EMA20 then reclaims.
3. **Volatility contraction → expansion**: ATR% compresses for M bars then expands + breakout.

You should treat these as templates and iterate based on your data.

In [None]:
def add_signals(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()

    # 1) Trend + breakout (use prior hh to avoid lookahead)
    out['sig_breakout_long'] = (out['trend_up']) & (out['close'] > out['hh_20'].shift(1))

    # 2) Pullback to EMA20 then reclaim
    # 'near' means within 0.5 ATR (tune)
    near = (out['close'] - out['ema_20']).abs() <= (0.5 * out['atr_14'])
    reclaim = (out['close'] > out['ema_20']) & (out['close'].shift(1) <= out['ema_20'].shift(1))
    out['sig_pullback_long'] = out['trend_up'] & near & reclaim

    # 3) ATR% contraction then expansion
    out['atrp_rank_50'] = out['atrp_14'].rolling(50, min_periods=50).apply(lambda x: pd.Series(x).rank(pct=True).iloc[-1])
    contraction = out['atrp_rank_50'] < 0.25
    expansion = out['atrp_rank_50'] > 0.60
    out['sig_vol_squeeze_breakout'] = contraction.shift(1) & expansion & (out['close'] > out['hh_20'].shift(1))

    return out

df = add_signals(df)
df[['open_dt_syd','close','sig_breakout_long','sig_pullback_long','sig_vol_squeeze_breakout']].tail(30)


## Backtest framework (simple long-only R-multiple model)
This is a pragmatic starter:
- enter on next bar open after signal
- stop at entry - k * ATR
- take profit at entry + m * ATR (single target)
- if neither hit by max holding bars, exit at close

Outputs trade list and basic performance metrics.

In [None]:
@dataclass
class BacktestParams:
    atr_mult_stop: float = 2.0
    atr_mult_tp: float = 3.0
    max_hold_bars: int = 48


def backtest_long(df: pd.DataFrame, signal_col: str, p: BacktestParams) -> pd.DataFrame:
    rows = []
    n = len(df)

    # Require ATR to be present
    if 'atr_14' not in df.columns:
        raise ValueError('ATR column atr_14 missing; run add_features first')

    for i in range(n - 2):
        if not bool(df[signal_col].iloc[i]):
            continue

        # enter next bar open to avoid lookahead
        entry_i = i + 1
        entry = float(df['open'].iloc[entry_i])
        atrv = float(df['atr_14'].iloc[entry_i])
        if not np.isfinite(entry) or not np.isfinite(atrv) or atrv <= 0:
            continue

        stop = entry - p.atr_mult_stop * atrv
        tp = entry + p.atr_mult_tp * atrv

        exit_i = None
        exit_px = None
        outcome = None

        last_i = min(n - 1, entry_i + p.max_hold_bars)
        for j in range(entry_i, last_i + 1):
            lo = float(df['low'].iloc[j])
            hi = float(df['high'].iloc[j])
            # Conservative ordering assumption for long: stop can trigger before TP within bar
            if lo <= stop:
                exit_i = j
                exit_px = stop
                outcome = 'stop'
                break
            if hi >= tp:
                exit_i = j
                exit_px = tp
                outcome = 'tp'
                break

        if exit_i is None:
            exit_i = last_i
            exit_px = float(df['close'].iloc[exit_i])
            outcome = 'time'

        r = (exit_px - entry) / (entry - stop) if (entry - stop) != 0 else np.nan

        rows.append({
            'signal': signal_col,
            'entry_i': entry_i,
            'exit_i': exit_i,
            'entry_time_utc': df['open_dt_utc'].iloc[entry_i],
            'entry_time_syd': df['open_dt_syd'].iloc[entry_i],
            'exit_time_utc': df['open_dt_utc'].iloc[exit_i],
            'entry': entry,
            'stop': stop,
            'tp': tp,
            'exit': exit_px,
            'bars_held': exit_i - entry_i,
            'outcome': outcome,
            'r_multiple': r,
        })

    return pd.DataFrame(rows)


def summarize_trades(trades: pd.DataFrame) -> pd.DataFrame:
    if trades.empty:
        return pd.DataFrame([{'n_trades': 0}])
    wins = trades['r_multiple'] > 0
    return pd.DataFrame([{
        'n_trades': int(len(trades)),
        'win_rate': float(wins.mean()),
        'avg_r': float(trades['r_multiple'].mean()),
        'median_r': float(trades['r_multiple'].median()),
        'p10_r': float(trades['r_multiple'].quantile(0.10)),
        'p90_r': float(trades['r_multiple'].quantile(0.90)),
        'avg_bars_held': float(trades['bars_held'].mean()),
    }])


In [None]:
params = BacktestParams(atr_mult_stop=2.0, atr_mult_tp=3.0, max_hold_bars=48)

tr_breakout = backtest_long(df, 'sig_breakout_long', params)
tr_pullback = backtest_long(df, 'sig_pullback_long', params)
tr_squeeze = backtest_long(df, 'sig_vol_squeeze_breakout', params)

display(summarize_trades(tr_breakout))
display(summarize_trades(tr_pullback))
display(summarize_trades(tr_squeeze))


## Next steps (recommended iteration path)
1. Choose a universe of liquid symbols (e.g., top 50 by volume) and run the same backtest across all symbols.
2. Add realistic fees/slippage and exclude low-liquidity bars.
3. Add filters: higher timeframe trend, volume confirmation, avoid major news windows (if applicable).
4. Improve intrabar execution model (OHLC ambiguity): best/worst-case, or use lower timeframe for execution.
5. Track out-of-sample by time splits.
