In [None]:
# --- Imports ---
# scipy.stats.norm: CDF for Black-Scholes d2 probability
# data_manager: Binance klines + Polymarket S3 orderbook loaders
# backtest: binary token backtester (proportional / fixed sizing)
from data_manager import BinanceManager, PolymarketManager
from scipy.stats import norm

from backtest import backtest, backtest_fixed, backtest_summary  # noqa: F401

In [None]:
# --- Binance 1m klines (BTCUSDT perpetual) ---
# Fetches USDT-margined perpetual 1-minute OHLCV from Binance Futures API.
# Data is cached to ./data/ as CSV; subsequent runs only fetch new candles.
# Used later as spot price (S) and realized volatility input for the BS model.
binance = BinanceManager(base_dir='./data')
btc_klines = binance.fetch_klines('usdm', 'BTCUSDT', '1m', start='2026-01-01')

In [None]:
# --- Polymarket 15-min BTC binary option orderbook (1-min snapshots from S3) ---
# Downloads Up-token orderbook snapshots from a private S3 bucket.
# Parquet files are cached locally; only missing windows are fetched on re-run.
#
# book_df columns:
#   start_time / end_time  - 15-min window boundaries
#   up_best_bid/ask, down_best_bid/ask - best level-1 quotes
#   up_mid / down_mid     - mid prices (bid+ask)/2
#   window_close_up_mid   - last observed up mid before window settles
#   resolved              - settlement outcome: 'UP' / 'DOWN' / 'UNKNOWN'
pm = PolymarketManager(base_dir='./data')
book_df = pm.load_book_df(
    asset='BTC', freq='15M',
    start_date='2026-01-01', end_date='2026-03-31',
    log=True
)
book_df

In [None]:
# --- Feature engineering: Black-Scholes d2 probability ---
# Estimates fair probability of each 15-min window settling UP using the BS model.
#   S = current spot price (1m open)
#   K = strike = spot at window open (resampled & forward-filled)
#   T = minutes remaining until window expiry
#   sigma = realized log-return vol, scaled by 1/sqrt(T) for term adjustment
#   d2 = (ln(S/K) - 0.5*sigma^2*T) / (sigma*sqrt(T))
#   up_token_prob = N(d2)  -- normal CDF of d2
import numpy as np

freq_minutes = 15
start_date = '2026'

features = btc_klines.copy(deep=True)[start_date:]
features['S'] = features['open']
features['K'] = features['open'].resample(f'{freq_minutes}min').first()
features['K'] = features['K'].ffill(limit=freq_minutes)
features['T'] = freq_minutes - (features.index.minute % freq_minutes)
features['r_vol'] = np.log(features['open']).diff().rolling(window=freq_minutes).std()
features['adj_term'] = 1/np.sqrt(features['T'])

S = features['S']
K = features['K']
T = features['T']
sigma = (features['r_vol'] * features['adj_term']) * 2
d2 = (np.log(S / K) - (0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
features['up_token_prob'] = norm.cdf(d2)
features['down_token_prob'] = 1 - features['up_token_prob']
features

In [None]:
# --- Backtest: momentum signal near expiry ---
# Signal logic:
#   Buy UP  token when up_mid  > 0.7 and T < 4 min (market already confident)
#   Buy DOWN token when down_mid > 0.7 and T < 4 min
#
# backtest() parameters:
#   initial_balance - starting capital ($)
#   bet_pct         - fraction of balance risked per window (0.01 = 1%)
#   fee             - apply Polymarket variable fee (0.25 * (p*(1-p))^2)
#   slippage_bps    - execution slippage in basis points (5 = 0.05%)
#
# Alternative: backtest_fixed() uses a constant dollar bet_size per signal.
up_cond   = (book_df['up_mid']   > 0.7) & (features['T'] < 4)
down_cond = (book_df['down_mid'] > 0.7) & (features['T'] < 4)

up_cond   = up_cond.reindex(book_df.index)
down_cond = down_cond.reindex(book_df.index)

trades = backtest(book_df, up_condition=up_cond, down_condition=down_cond,
                  initial_balance=1000, bet_pct=0.01, fee=True, slippage_bps=5)

metrics = backtest_summary(trades)
metrics

In [None]:
# --- Performance dashboard ---
# Panel interpretation:
#   [0,0] Cumulative PnL  - equity curve; flat regions = S3 data gaps
#   [0,1] Drawdown        - peak-to-trough decline in dollar terms
#   [1,0] PnL Distribution - per-market win/loss histogram; mean shown as dashed line
#   [1,1] Win Rate         - expanding (cumulative) win rate over time
#
# Summary stats printed below:
#   Sharpe/Mkt = mean(pnl) / std(pnl) per market (not annualized)
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(14, 8))
idx = metrics.index

ax = axes[0, 0]
ax.plot(idx, metrics['cum_pnl'].values, linewidth=0.9, drawstyle='steps-post')
ax.axhline(0, color='k', linewidth=0.4)
ax.fill_between(idx, metrics['cum_pnl'].values, 0,
                where=metrics['cum_pnl'].values >= 0, alpha=0.15, color='C0',
                step='post')
ax.fill_between(idx, metrics['cum_pnl'].values, 0,
                where=metrics['cum_pnl'].values < 0, alpha=0.15, color='red',
                step='post')
ax.set_title('Cumulative PnL')
ax.set_ylabel('$')
ax.grid(True, alpha=0.3)

ax = axes[0, 1]
dd = metrics['drawdown'].values
ax.fill_between(idx, -dd, 0, color='red', alpha=0.4, step='post')
ax.set_title('Drawdown')
ax.set_ylabel('$')
ax.grid(True, alpha=0.3)

ax = axes[1, 0]
pnl = metrics['total_pnl']
wins = pnl[pnl > 0]
losses = pnl[pnl <= 0]
ax.hist(wins, bins=30, alpha=0.7, color='steelblue', label=f'Win  {len(wins)}')
ax.hist(losses, bins=30, alpha=0.7, color='salmon', label=f'Loss {len(losses)}')
ax.axvline(pnl.mean(), color='k', linestyle='--', linewidth=0.8,
           label=f'Mean {pnl.mean():.2f}')
ax.set_title('PnL Distribution (per market)')
ax.set_xlabel('$')
ax.legend(fontsize=8)
ax.grid(True, alpha=0.3)

ax = axes[1, 1]
ax.plot(idx, metrics['win_rate'].values, linewidth=0.9, color='teal', drawstyle='steps-post')
ax.axhline(0.5, color='k', linestyle='--', linewidth=0.4)
ax.set_title('Cumulative Win Rate')
ax.set_ylabel('Rate')
ax.set_ylim(0, 1)
ax.grid(True, alpha=0.3)

for ax in axes.flat:
    ax.tick_params(axis='x', rotation=30, labelsize=8)
    ax.xaxis.set_visible(True)

plt.tight_layout()
plt.show()

n = len(metrics)
total = metrics['total_pnl'].sum()
wr = metrics['win'].mean()
sharpe = metrics['total_pnl'].mean() / metrics['total_pnl'].std() if metrics['total_pnl'].std() > 0 else 0
max_dd = metrics['drawdown'].max()
print(f"Markets: {n} | PnL: ${total:,.2f} | Win Rate: {wr:.1%} | "
      f"Sharpe/Mkt: {sharpe:.3f} | Max DD: ${max_dd:,.2f}")