In [None]:
"""
CASCADE BACKTEST — BTC Lead-Lag Analysis
Tests whether BTC price moves predict altcoin direction.
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Load all price data from our training CSVs
DATA_DIR = '/content/drive/MyDrive/renaissance-bot-training/code/data/training'

PAIRS = {
    'BTC': 'BTC-USD',
    'ETH': 'ETH-USD',
    'SOL': 'SOL-USD',
    'XRP': 'XRP-USD',
    'DOGE': 'DOGE-USD',
    'AVAX': 'AVAX-USD',
}

data = {}
for name, pair in PAIRS.items():
    try:
        df = None
        # Try multiple possible file naming patterns
        for pattern in [
            f'{DATA_DIR}/{pair}.csv',
            f'{DATA_DIR}/{pair}_5m.csv',
            f'{DATA_DIR}/{pair.replace("-", "_")}_5m.csv',
            f'{DATA_DIR}/{pair.lower()}_5m.csv',
            f'{DATA_DIR}/{pair}_5min.csv',
            f'{DATA_DIR}/{pair}_5m_historical.csv',
        ]:
            try:
                df = pd.read_csv(pattern)
                print(f"Loaded {name} from {pattern.split('/')[-1]}")
                break
            except FileNotFoundError:
                continue
        
        if df is None:
            print(f"WARNING: No file found for {pair}")
            continue

        # Normalize columns
        df.columns = [c.lower().strip() for c in df.columns]

        # Ensure timestamp column exists and is datetime
        for ts_col in ['timestamp', 'time', 'date', 'datetime', 'open_time']:
            if ts_col in df.columns:
                ts_vals = df[ts_col]
                # Auto-detect: if values > 1e12 they're milliseconds
                if ts_vals.iloc[0] > 1e12:
                    df['timestamp'] = pd.to_datetime(ts_vals, unit='ms')
                elif ts_vals.iloc[0] > 1e9:
                    df['timestamp'] = pd.to_datetime(ts_vals, unit='s')
                else:
                    df['timestamp'] = pd.to_datetime(ts_vals)
                break

        df = df.sort_values('timestamp').reset_index(drop=True)

        # Compute returns
        df['return_1bar'] = df['close'].pct_change()

        data[name] = df
        print(f"  {name}: {len(df):,} rows, {df['timestamp'].min()} to {df['timestamp'].max()}")
    except Exception as e:
        print(f"ERROR loading {pair}: {e}")

print(f"\nLoaded {len(data)} assets")

# Find common time range
if len(data) >= 2:
    common_start = max(df['timestamp'].min() for df in data.values())
    common_end = min(df['timestamp'].max() for df in data.values())
    print(f"Common range: {common_start} to {common_end}")

    # Trim all to common range
    for name in data:
        mask = (data[name]['timestamp'] >= common_start) & (data[name]['timestamp'] <= common_end)
        data[name] = data[name][mask].reset_index(drop=True)
        print(f"  {name}: {len(data[name]):,} rows after trim")

In [None]:
"""
Find all moments where BTC moved sharply.
These are the "trigger events" that should cascade to alts.
"""

btc = data['BTC'].copy()

# Compute rolling returns over different windows
btc['ret_1bar'] = btc['close'].pct_change(1)      # 5-min return
btc['ret_2bar'] = btc['close'].pct_change(2)      # 10-min return
btc['ret_3bar'] = btc['close'].pct_change(3)      # 15-min return
btc['ret_6bar'] = btc['close'].pct_change(6)      # 30-min return

# Define trigger thresholds (minimum absolute move)
TRIGGERS = {
    '1bar_0.3pct': ('ret_1bar', 0.003),   # 0.3% in 5 minutes
    '1bar_0.5pct': ('ret_1bar', 0.005),   # 0.5% in 5 minutes
    '1bar_1.0pct': ('ret_1bar', 0.010),   # 1.0% in 5 minutes (rare)
    '2bar_0.4pct': ('ret_2bar', 0.004),   # 0.4% in 10 minutes
    '3bar_0.5pct': ('ret_3bar', 0.005),   # 0.5% in 15 minutes
    '6bar_0.8pct': ('ret_6bar', 0.008),   # 0.8% in 30 minutes
}

trigger_counts = {}
for name, (col, threshold) in TRIGGERS.items():
    mask = btc[col].abs() >= threshold
    trigger_counts[name] = mask.sum()
    btc[f'trigger_{name}'] = mask
    print(f"{name}: {mask.sum():,} triggers ({mask.mean()*100:.2f}% of bars)")

# Most useful trigger for Cascade: 1bar_0.3pct (sharp 5-min move)
primary_trigger = btc[btc['trigger_1bar_0.3pct']].copy()
primary_trigger['btc_direction'] = np.where(primary_trigger['ret_1bar'] > 0, 'UP', 'DOWN')
print(f"\nPrimary triggers (0.3% in 5min): {len(primary_trigger):,}")
print(f"  UP:   {(primary_trigger['btc_direction'] == 'UP').sum():,}")
print(f"  DOWN: {(primary_trigger['btc_direction'] == 'DOWN').sum():,}")

In [None]:
"""
For each BTC trigger, check if each alt followed in same direction.

"Follow-through" means: alt moved in BTC's direction within N bars.
We measure at bar +1, +2, +3, +6 (5 min to 30 min).
"""

# Align all data by timestamp
# Create a master index aligned on BTC timestamps
btc_ts = data['BTC'][['timestamp', 'close']].rename(columns={'close': 'btc_close'})
btc_ts['btc_ret_1bar'] = btc_ts['btc_close'].pct_change()

results = {}
for alt_name in [k for k in data.keys() if k != 'BTC']:
    alt = data[alt_name][['timestamp', 'close']].rename(columns={'close': f'{alt_name}_close'})

    # Merge on timestamp (inner join — only keep bars where both have data)
    merged = pd.merge(btc_ts, alt, on='timestamp', how='inner')
    merged[f'{alt_name}_ret_1bar'] = merged[f'{alt_name}_close'].pct_change()

    # For each BTC trigger, compute alt returns over next N bars
    trigger_mask = merged['btc_ret_1bar'].abs() >= 0.003  # 0.3% threshold
    trigger_indices = merged.index[trigger_mask].tolist()

    follow_data = []
    for idx in trigger_indices:
        btc_ret = merged.loc[idx, 'btc_ret_1bar']
        btc_dir = 1 if btc_ret > 0 else -1

        row = {'btc_ret': btc_ret, 'btc_dir': btc_dir}

        # Alt return at different forward horizons
        for offset_name, offset in [('t+1', 1), ('t+2', 2), ('t+3', 3), ('t+6', 6)]:
            if idx + offset < len(merged):
                alt_fwd_price = merged.loc[idx + offset, f'{alt_name}_close']
                alt_cur_price = merged.loc[idx, f'{alt_name}_close']
                alt_fwd_ret = alt_fwd_price / alt_cur_price - 1

                # Did alt follow BTC's direction?
                same_dir = (alt_fwd_ret > 0 and btc_dir > 0) or (alt_fwd_ret < 0 and btc_dir < 0)

                row[f'{offset_name}_ret'] = alt_fwd_ret
                row[f'{offset_name}_follow'] = same_dir
                row[f'{offset_name}_magnitude'] = abs(alt_fwd_ret)
            else:
                row[f'{offset_name}_ret'] = np.nan
                row[f'{offset_name}_follow'] = np.nan
                row[f'{offset_name}_magnitude'] = np.nan

        # Also: did alt ALREADY move at t+0? (contemporaneous, not leading)
        alt_same_bar_ret = merged.loc[idx, f'{alt_name}_ret_1bar']
        row['t+0_ret'] = alt_same_bar_ret
        row['t+0_follow'] = (alt_same_bar_ret > 0 and btc_dir > 0) or (alt_same_bar_ret < 0 and btc_dir < 0)

        follow_data.append(row)

    follow_df = pd.DataFrame(follow_data)
    results[alt_name] = follow_df

    # Summary statistics
    print(f"\n{'='*60}")
    print(f"{alt_name} following BTC (0.3% trigger, {len(follow_df):,} events)")
    print(f"{'='*60}")
    for horizon in ['t+0', 't+1', 't+2', 't+3', 't+6']:
        col = f'{horizon}_follow'
        if col in follow_df.columns:
            hit_rate = follow_df[col].mean() * 100
            avg_mag = follow_df[f'{horizon}_magnitude'].mean() * 100
            print(f"  {horizon} (={'same bar' if horizon=='t+0' else horizon.replace('t+','')+'x5min'}): "
                  f"{hit_rate:.1f}% follow rate, "
                  f"avg move {avg_mag:.3f}%")

In [None]:
"""
Does the follow-through rate increase with BTC move size?
i.e., bigger BTC moves = more reliable cascade?
"""

print("FOLLOW-THROUGH RATE BY BTC MOVE SIZE")
print("="*80)

btc_bins = [
    (0.003, 0.005, '0.3-0.5%'),
    (0.005, 0.008, '0.5-0.8%'),
    (0.008, 0.012, '0.8-1.2%'),
    (0.012, 0.020, '1.2-2.0%'),
    (0.020, 1.000, '2.0%+'),
]

for alt_name, follow_df in results.items():
    print(f"\n{alt_name}:")
    print(f"  {'BTC Move':<12} {'Events':<8} {'t+1 HR':<10} {'t+2 HR':<10} {'t+3 HR':<10}")
    print(f"  {'-'*50}")

    for lo, hi, label in btc_bins:
        mask = (follow_df['btc_ret'].abs() >= lo) & (follow_df['btc_ret'].abs() < hi)
        subset = follow_df[mask]
        if len(subset) < 10:
            continue

        t1 = subset['t+1_follow'].mean() * 100 if 't+1_follow' in subset else 0
        t2 = subset['t+2_follow'].mean() * 100 if 't+2_follow' in subset else 0
        t3 = subset['t+3_follow'].mean() * 100 if 't+3_follow' in subset else 0

        print(f"  {label:<12} {len(subset):<8} {t1:<10.1f} {t2:<10.1f} {t3:<10.1f}")

In [None]:
"""
Measure the actual lagged correlation between BTC and each alt.
This validates (or kills) our assumed correlation numbers.
"""

print("LAGGED CROSS-CORRELATION: BTC -> ALT")
print("="*80)
print("(Positive lag = BTC leads, Negative lag = ALT leads)")
print()

correlation_results = {}

for alt_name in [k for k in data.keys() if k != 'BTC']:
    merged = pd.merge(
        data['BTC'][['timestamp', 'close']].rename(columns={'close': 'btc_close'}),
        data[alt_name][['timestamp', 'close']].rename(columns={'close': f'{alt_name}_close'}),
        on='timestamp', how='inner'
    )
    btc_ret = merged['btc_close'].pct_change().dropna()
    alt_ret = merged[f'{alt_name}_close'].pct_change().dropna()

    # Compute cross-correlation at different lags
    # lag > 0: BTC at time t vs ALT at time t+lag (BTC leads)
    # lag < 0: ALT leads BTC
    lags = range(-5, 6)  # -25min to +25min in 5-min steps
    correlations = []
    for lag in lags:
        if lag > 0:
            corr = btc_ret.iloc[:-lag].corr(alt_ret.iloc[lag:].reset_index(drop=True))
        elif lag < 0:
            corr = btc_ret.iloc[-lag:].reset_index(drop=True).corr(alt_ret.iloc[:lag])
        else:
            corr = btc_ret.corr(alt_ret)
        correlations.append(corr)

    correlation_results[alt_name] = dict(zip(lags, correlations))

    # Find peak
    peak_lag = list(lags)[np.argmax(correlations)]
    peak_corr = max(correlations)
    contemp_corr = correlation_results[alt_name][0]

    print(f"{alt_name}:")
    print(f"  Contemporaneous (lag=0): {contemp_corr:.4f}")
    print(f"  Peak correlation: {peak_corr:.4f} at lag={peak_lag} ({peak_lag*5}min)")
    for l, c in zip(lags, correlations):
        marker = " <<<" if l == peak_lag else ""
        print(f"    {l*5:+3d}min: {c:.4f}{marker}")
    print()

# Also compute rolling correlation (does it change over time?)
print("\n\nROLLING 30-DAY CORRELATION (contemporaneous)")
print("="*80)
for alt_name in [k for k in data.keys() if k != 'BTC']:
    merged = pd.merge(
        data['BTC'][['timestamp', 'close']].rename(columns={'close': 'btc_close'}),
        data[alt_name][['timestamp', 'close']].rename(columns={'close': f'{alt_name}_close'}),
        on='timestamp', how='inner'
    )
    btc_ret = merged['btc_close'].pct_change()
    alt_ret = merged[f'{alt_name}_close'].pct_change()

    # 30-day rolling = 30*288 = 8640 bars
    rolling_corr = btc_ret.rolling(8640).corr(alt_ret)
    print(f"{alt_name}:")
    print(f"  Min: {rolling_corr.min():.3f}  Max: {rolling_corr.max():.3f}  "
          f"Mean: {rolling_corr.mean():.3f}  Current (last 30d): {rolling_corr.iloc[-1]:.3f}")

In [None]:
"""
Simulate the actual Cascade strategy as if betting on Polymarket.

Assumptions:
  - We bet $20 per trade (fixed size for simplicity)
  - Entry: buy "DOWN" shares at 50 cents when BTC drops >0.3%
  - Win condition: alt actually goes down within 3 bars (15 min)
  - Payout: $1.00 per share if we win, $0 if we lose
  - So: win = +$20, lose = -$20 (at 50c entry)
  - No Polymarket fees (they're zero)
"""

BET_SIZE = 20.0
ENTRY_PRICE = 0.50  # Assume crowd is always at 50% when we bet (conservative)
BTC_THRESHOLD = 0.003  # 0.3% trigger

print("CASCADE P&L SIMULATION")
print("="*80)
print(f"Bet size: ${BET_SIZE:.2f} | Entry: {ENTRY_PRICE:.0%} | Threshold: {BTC_THRESHOLD:.1%}")
print()

for alt_name, follow_df in results.items():
    # Use t+3 (15-minute) as resolution horizon
    wins = follow_df['t+3_follow'].sum()
    losses = len(follow_df) - wins
    total = len(follow_df)

    win_rate = wins / total if total > 0 else 0
    gross_pnl_wins = wins * (BET_SIZE / ENTRY_PRICE - BET_SIZE)  # Win: shares*$1 - cost
    gross_pnl_losses = losses * (-BET_SIZE)  # Lose: lose entire bet

    # At 50c entry: win +$20, lose -$20
    net_pnl = (wins * BET_SIZE) + (losses * -BET_SIZE)
    per_bet = net_pnl / total if total > 0 else 0
    roi = net_pnl / (total * BET_SIZE) * 100

    print(f"{alt_name}:")
    print(f"  Events: {total:,}  Won: {wins:,}  Lost: {losses:,}  WR: {win_rate:.1%}")
    print(f"  Net P&L: ${net_pnl:+,.2f}  Per bet: ${per_bet:+.2f}  ROI: {roi:+.1f}%")

    # Breakdown by year
    follow_df_ts = follow_df.copy()
    # We need timestamps - add them back from trigger events
    trigger_mask = data['BTC']['return_1bar'].abs() >= BTC_THRESHOLD
    trigger_ts = data['BTC'].loc[trigger_mask, 'timestamp'].values
    if len(trigger_ts) >= len(follow_df_ts):
        follow_df_ts['year'] = pd.to_datetime(trigger_ts[:len(follow_df_ts)]).year
        print(f"  By year:")
        for year, group in follow_df_ts.groupby('year'):
            yr_wr = group['t+3_follow'].mean()
            yr_n = len(group)
            print(f"    {year}: {yr_n:,} events, {yr_wr:.1%} WR")
    print()

In [None]:
"""
When does BTC move but alts DON'T follow?
Understanding failure modes is critical for risk management.
"""

print("CASCADE FAILURE ANALYSIS")
print("="*80)

for alt_name, follow_df in results.items():
    # Events where BTC moved >0.5% but alt didn't follow at t+3
    big_btc = follow_df[follow_df['btc_ret'].abs() >= 0.005]
    failures = big_btc[big_btc['t+3_follow'] == False]

    print(f"\n{alt_name} \u2014 Big BTC moves (>0.5%) that alt didn't follow:")
    print(f"  Total big moves: {len(big_btc):,}")
    print(f"  Failures: {len(failures):,} ({len(failures)/len(big_btc)*100:.1f}%)" if len(big_btc) > 0 else "  No big moves")

    if len(failures) > 0:
        # What happened in these cases?
        # Did alt move opposite direction? Or just not move?
        opp_dir = (failures['t+3_ret'] * failures['btc_dir']).apply(lambda x: x < -0.002).sum()
        no_move = (failures['t+3_ret'].abs() < 0.001).sum()
        print(f"  Alt moved OPPOSITE (>0.2%): {opp_dir} ({opp_dir/len(failures)*100:.1f}%)")
        print(f"  Alt didn't move (<0.1%): {no_move} ({no_move/len(failures)*100:.1f}%)")

In [None]:
"""
Final summary table with the numbers that matter for Cascade deployment.
"""

print()
print("="*80)
print("CASCADE BACKTEST SUMMARY \u2014 THE VERDICT")
print("="*80)
print()
print(f"{'Asset':<8} {'Events':<8} {'t+1 WR':<10} {'t+2 WR':<10} {'t+3 WR':<10} "
      f"{'Contemp':<10} {'P&L/bet':<10} {'DEPLOY?':<10}")
print("-"*80)

for alt_name, follow_df in results.items():
    n = len(follow_df)
    wr1 = follow_df['t+1_follow'].mean() if 't+1_follow' in follow_df else 0
    wr2 = follow_df['t+2_follow'].mean() if 't+2_follow' in follow_df else 0
    wr3 = follow_df['t+3_follow'].mean() if 't+3_follow' in follow_df else 0
    contemp = correlation_results.get(alt_name, {}).get(0, 0)

    # P&L per $20 bet at 50c entry
    pnl_per = (wr3 - 0.5) * 2 * 20  # = (wr - 0.5) * 40

    # Deploy if win rate > 55% at t+3
    deploy = "YES" if wr3 > 0.55 else ("MAYBE" if wr3 > 0.52 else "NO")

    print(f"{alt_name:<8} {n:<8,} {wr1:<10.1%} {wr2:<10.1%} {wr3:<10.1%} "
          f"{contemp:<10.3f} ${pnl_per:<9.2f} {deploy:<10}")

print()
print("KEY:")
print("  t+1/t+2/t+3 WR = Win rate at 5/10/15 minutes after BTC trigger")
print("  Contemp = Contemporaneous correlation (same 5-min bar)")
print("  P&L/bet = Expected profit per $20 bet at 50c entry")
print("  DEPLOY = YES if t+3 WR > 55%, MAYBE if > 52%, NO otherwise")
print()
print("If ALL alts show t+3 WR > 55%: Cascade is validated. Deploy.")
print("If SOME alts > 55%: Deploy those, skip the rest.")
print("If NO alts > 55%: Kill the Cascade strategy. It doesn't work.")