In [None]:
# =============================================================================
# CELL 1: Setup, imports, mount Drive, define crash periods
# =============================================================================

import subprocess, sys

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install dependencies
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q',
                       'lightgbm', 'yfinance'])

import os
import time
import json
import pickle
import requests
import numpy as np
import pandas as pd
import lightgbm as lgb
import yfinance as yf
from datetime import datetime, timedelta
from sklearn.metrics import accuracy_score, roc_auc_score

# Directories
DRIVE_DIR = '/content/drive/MyDrive/renaissance-bot-training/crash_models_v3'
LOCAL_DIR = '/content/data'
os.makedirs(DRIVE_DIR, exist_ok=True)
os.makedirs(LOCAL_DIR, exist_ok=True)

# Crash period definitions
CRASH_PERIODS = [
    ('2018-01-07', '2018-12-15', 'ICO Bust'),
    ('2021-11-10', '2022-11-21', 'Terra/Luna + FTX + Fed'),
    ('2025-10-06', '2026-02-28', 'Current macro crash'),
]

print('[OK] Setup complete')
print(f'Drive dir: {DRIVE_DIR}')
print(f'Local dir: {LOCAL_DIR}')
print()
print('Crash periods:')
for i, (s, e, name) in enumerate(CRASH_PERIODS, 1):
    print(f'  Crash {i}: {s} to {e} -- {name}')
print()
print(f'[OK] LightGBM version: {lgb.__version__}')
print(f'[OK] Current time (UTC): {datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")}')

In [None]:
# =============================================================================
# CELL 2: Download BTC and ETH 5-minute candles from Binance
# =============================================================================

BASE_URL = "https://data-api.binance.vision/api/v3/klines"

KLINE_COLS = [
    'open_time', 'open', 'high', 'low', 'close', 'volume',
    'close_time', 'quote_volume', 'trades',
    'taker_buy_base', 'taker_buy_quote', 'ignore'
]

NUMERIC_COLS = ['open', 'high', 'low', 'close', 'volume',
                'quote_volume', 'taker_buy_base', 'taker_buy_quote']


def download_binance_klines(symbol, start_date, interval='5m'):
    """Download klines from Binance data-api with pagination."""
    start_ts = int(pd.Timestamp(start_date).timestamp() * 1000)
    now_ts = int(datetime.utcnow().timestamp() * 1000)

    all_rows = []
    current_ts = start_ts
    total_fetched = 0
    retries = 0
    max_retries = 5

    print(f'Downloading {symbol} {interval} from {start_date} ...')

    while current_ts < now_ts:
        params = {
            'symbol': symbol,
            'interval': interval,
            'startTime': current_ts,
            'limit': 1000
        }
        try:
            resp = requests.get(BASE_URL, params=params, timeout=30)
            if resp.status_code in (451, 403):
                retries += 1
                if retries > max_retries:
                    print(f'[WARN] Too many 451/403 errors, stopping at {total_fetched} candles')
                    break
                print(f'[WARN] HTTP {resp.status_code}, retry {retries}/{max_retries}, sleeping 5s ...')
                time.sleep(5)
                continue
            resp.raise_for_status()
            data = resp.json()
            if not data:
                break

            retries = 0  # reset on success
            all_rows.extend(data)
            total_fetched += len(data)

            # Progress
            if total_fetched % 100000 < 1000:
                dt_str = pd.Timestamp(data[-1][0], unit='ms').strftime('%Y-%m-%d')
                print(f'  {total_fetched:>9,} candles ... last date: {dt_str}')

            # Advance past the last candle
            current_ts = data[-1][0] + 1
            time.sleep(0.1)

        except requests.exceptions.Timeout:
            retries += 1
            if retries > max_retries:
                print(f'[WARN] Too many timeouts, stopping at {total_fetched} candles')
                break
            print(f'[WARN] Timeout, retry {retries}/{max_retries}, sleeping 5s ...')
            time.sleep(5)
            continue
        except Exception as e:
            retries += 1
            if retries > max_retries:
                print(f'[WARN] Error: {e}, stopping at {total_fetched} candles')
                break
            print(f'[WARN] Error: {e}, retry {retries}/{max_retries}, sleeping 5s ...')
            time.sleep(5)
            continue

    print(f'[OK] Total: {total_fetched:,} candles for {symbol}')

    if not all_rows:
        return pd.DataFrame(columns=KLINE_COLS)

    df = pd.DataFrame(all_rows, columns=KLINE_COLS)
    for col in NUMERIC_COLS:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df['trades'] = pd.to_numeric(df['trades'], errors='coerce').astype(int)
    df['timestamp'] = pd.to_datetime(df['open_time'], unit='ms')
    df = df.drop_duplicates(subset=['open_time']).sort_values('open_time').reset_index(drop=True)
    return df


# Download BTC
btc_df = download_binance_klines('BTCUSDT', '2017-09-01')
print(f'BTC shape: {btc_df.shape}, date range: {btc_df["timestamp"].min()} to {btc_df["timestamp"].max()}')

# Download ETH
eth_df = download_binance_klines('ETHUSDT', '2017-09-01')
print(f'ETH shape: {eth_df.shape}, date range: {eth_df["timestamp"].min()} to {eth_df["timestamp"].max()}')

# Save to local and Drive
for name, df in [('btc_5m_full', btc_df), ('eth_5m_full', eth_df)]:
    local_path = os.path.join(LOCAL_DIR, f'{name}.csv')
    drive_path = os.path.join(DRIVE_DIR, f'{name}.csv')
    df.to_csv(local_path, index=False)
    df.to_csv(drive_path, index=False)
    print(f'[OK] Saved {name}: {len(df):,} rows -> {drive_path}')

print()
print('[DONE] BTC and ETH candle download complete')

In [None]:
# =============================================================================
# CELL 3: Download macro data (daily) + Fear & Greed
# =============================================================================

MACRO_TICKERS = {
    '^GSPC': 'spx',
    '^VIX': 'vix',
    'DX-Y.NYB': 'dxy',
    '^IXIC': 'ndx',
    '^TNX': 'us10y',
    'GC=F': 'gold',
}

macro_frames = {}

for ticker, short_name in MACRO_TICKERS.items():
    print(f'Downloading {ticker} ({short_name}) ...')
    try:
        data = yf.download(ticker, start='2017-01-01', progress=False)
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)
        if len(data) == 0:
            print(f'  [WARN] No data for {ticker}')
            continue
        # Keep Close column renamed
        series = data['Close'].copy()
        series.name = short_name
        macro_frames[short_name] = series
        print(f'  [OK] {short_name}: {len(series)} days, {series.index.min().date()} to {series.index.max().date()}')
    except Exception as e:
        print(f'  [WARN] Failed to download {ticker}: {e}')

# Merge all macro into one DataFrame
if macro_frames:
    macro_df = pd.DataFrame(macro_frames)
    macro_df.index.name = 'date'
    macro_df = macro_df.ffill().bfill()
    print(f'\n[OK] Macro DataFrame: {macro_df.shape}')
    print(macro_df.tail(3))
else:
    macro_df = pd.DataFrame()
    print('[WARN] No macro data downloaded')

# Fear & Greed Index
print('\nDownloading Fear & Greed Index ...')
try:
    fng_resp = requests.get('https://api.alternative.me/fng/?limit=0', timeout=30)
    fng_data = fng_resp.json()['data']
    fng_df = pd.DataFrame(fng_data)
    fng_df['date'] = pd.to_datetime(fng_df['timestamp'].astype(int), unit='s')
    fng_df['fng_value'] = pd.to_numeric(fng_df['value'], errors='coerce')
    fng_df = fng_df[['date', 'fng_value']].sort_values('date').reset_index(drop=True)
    fng_df = fng_df.drop_duplicates(subset=['date'])
    print(f'[OK] Fear & Greed: {len(fng_df)} days')
except Exception as e:
    print(f'[WARN] Fear & Greed download failed: {e}')
    fng_df = pd.DataFrame(columns=['date', 'fng_value'])

# Save
for name, df in [('macro_daily', macro_df), ('fear_greed', fng_df)]:
    local_path = os.path.join(LOCAL_DIR, f'{name}.csv')
    drive_path = os.path.join(DRIVE_DIR, f'{name}.csv')
    df.to_csv(local_path)
    df.to_csv(drive_path)
    print(f'[OK] Saved {name} -> {drive_path}')

print()
print('[DONE] Macro and Fear & Greed download complete')

In [None]:
# =============================================================================
# CELL 4: Download Binance derivatives data (OPTIONAL)
# =============================================================================

FAPI_BASE = "https://fapi.binance.com"


def download_paginated(url, params_base, time_key='startTime', limit_key='limit',
                       limit=1000, max_pages=200, label='data'):
    """Generic paginated downloader for Binance futures endpoints."""
    all_data = []
    params = dict(params_base)
    params[limit_key] = limit

    for page in range(max_pages):
        try:
            resp = requests.get(url, params=params, timeout=30)
            if resp.status_code in (451, 403):
                print(f'  [WARN] HTTP {resp.status_code} on page {page} for {label}, stopping')
                break
            resp.raise_for_status()
            data = resp.json()
            if not data:
                break
            all_data.extend(data)

            # Find max timestamp to paginate forward
            if isinstance(data[-1], dict):
                last_ts = None
                for key in ['fundingTime', 'timestamp', 'time']:
                    if key in data[-1]:
                        last_ts = int(data[-1][key])
                        break
                if last_ts is None:
                    break
                params[time_key] = last_ts + 1
            else:
                break

            time.sleep(0.15)
        except Exception as e:
            print(f'  [WARN] Error on page {page} for {label}: {e}')
            break

    print(f'  [OK] {label}: {len(all_data)} records')
    return all_data


# 1. Funding Rate (8h intervals, paginate from 2020-01-01)
print('Downloading funding rate ...')
funding_start = int(pd.Timestamp('2020-01-01').timestamp() * 1000)
funding_raw = download_paginated(
    f'{FAPI_BASE}/fapi/v1/fundingRate',
    {'symbol': 'BTCUSDT', 'startTime': funding_start},
    time_key='startTime', limit=1000, max_pages=300, label='funding_rate'
)
if funding_raw:
    funding_df = pd.DataFrame(funding_raw)
    funding_df['timestamp'] = pd.to_datetime(funding_df['fundingTime'], unit='ms')
    funding_df['fundingRate'] = pd.to_numeric(funding_df['fundingRate'], errors='coerce')
    funding_df = funding_df.sort_values('timestamp').drop_duplicates(subset=['fundingTime']).reset_index(drop=True)
else:
    funding_df = pd.DataFrame(columns=['timestamp', 'fundingRate'])

# 2. Open Interest (4h intervals)
print('Downloading open interest ...')
oi_start = int(pd.Timestamp('2020-06-01').timestamp() * 1000)
oi_raw = download_paginated(
    f'{FAPI_BASE}/futures/data/openInterestHist',
    {'symbol': 'BTCUSDT', 'period': '4h', 'startTime': oi_start},
    time_key='startTime', limit=500, max_pages=300, label='open_interest'
)
if oi_raw:
    oi_df = pd.DataFrame(oi_raw)
    oi_df['timestamp'] = pd.to_datetime(oi_df['timestamp'], unit='ms')
    for col in ['sumOpenInterest', 'sumOpenInterestValue']:
        if col in oi_df.columns:
            oi_df[col] = pd.to_numeric(oi_df[col], errors='coerce')
    oi_df = oi_df.sort_values('timestamp').reset_index(drop=True)
else:
    oi_df = pd.DataFrame(columns=['timestamp', 'sumOpenInterest', 'sumOpenInterestValue'])

# 3. Long/Short Account Ratio (1h)
print('Downloading long/short ratio ...')
ls_start = int(pd.Timestamp('2020-06-01').timestamp() * 1000)
ls_raw = download_paginated(
    f'{FAPI_BASE}/futures/data/globalLongShortAccountRatio',
    {'symbol': 'BTCUSDT', 'period': '1h', 'startTime': ls_start},
    time_key='startTime', limit=500, max_pages=300, label='long_short_ratio'
)
if ls_raw:
    ls_df = pd.DataFrame(ls_raw)
    ls_df['timestamp'] = pd.to_datetime(ls_df['timestamp'], unit='ms')
    ls_df['longShortRatio'] = pd.to_numeric(ls_df['longShortRatio'], errors='coerce')
    ls_df = ls_df.sort_values('timestamp').reset_index(drop=True)
else:
    ls_df = pd.DataFrame(columns=['timestamp', 'longShortRatio'])

# 4. Taker Buy/Sell Volume (1h)
print('Downloading taker buy/sell volume ...')
tv_start = int(pd.Timestamp('2020-06-01').timestamp() * 1000)
tv_raw = download_paginated(
    f'{FAPI_BASE}/futures/data/takeBuySellVol',
    {'symbol': 'BTCUSDT', 'period': '1h', 'startTime': tv_start},
    time_key='startTime', limit=500, max_pages=300, label='taker_volume'
)
if tv_raw:
    tv_df = pd.DataFrame(tv_raw)
    tv_df['timestamp'] = pd.to_datetime(tv_df['timestamp'], unit='ms')
    for col in ['buyVol', 'sellVol', 'buySellRatio']:
        if col in tv_df.columns:
            tv_df[col] = pd.to_numeric(tv_df[col], errors='coerce')
    tv_df = tv_df.sort_values('timestamp').reset_index(drop=True)
else:
    tv_df = pd.DataFrame(columns=['timestamp', 'buyVol', 'sellVol', 'buySellRatio'])

# Save all derivatives
for name, df in [('funding_rate', funding_df), ('open_interest', oi_df),
                  ('long_short_ratio', ls_df), ('taker_volume', tv_df)]:
    local_path = os.path.join(LOCAL_DIR, f'{name}.csv')
    drive_path = os.path.join(DRIVE_DIR, f'{name}.csv')
    df.to_csv(local_path, index=False)
    df.to_csv(drive_path, index=False)
    print(f'[OK] Saved {name}: {len(df)} rows -> {drive_path}')

print()
print('[DONE] Derivatives download complete (any failures above are non-fatal)')

In [None]:
# =============================================================================
# CELL 5: Label crash periods and merge all data
# =============================================================================

# 1. Label crash periods in BTC data
btc = btc_df.copy()
btc['is_crash'] = False
for start_str, end_str, name in CRASH_PERIODS:
    start = pd.Timestamp(start_str)
    end = pd.Timestamp(end_str)
    mask = (btc['timestamp'] >= start) & (btc['timestamp'] <= end)
    btc.loc[mask, 'is_crash'] = True
    count = mask.sum()
    print(f'Crash period {start_str} to {end_str} ({name}): {count:,} candles')

crash_data = btc[btc['is_crash']].copy().reset_index(drop=True)
print(f'\nTotal crash candles: {len(crash_data):,}')
print(f'Total non-crash candles: {(~btc["is_crash"]).sum():,}')

# 2. Create date column for daily merges
crash_data['date'] = crash_data['timestamp'].dt.date
crash_data['date'] = pd.to_datetime(crash_data['date'])

# 3. Merge daily macro onto crash data by date (forward-fill for weekends)
if len(macro_df) > 0:
    macro_merge = macro_df.copy()
    macro_merge.index = pd.to_datetime(macro_merge.index)
    # Reindex to all dates in crash data and forward-fill
    all_dates = crash_data['date'].unique()
    macro_reindexed = macro_merge.reindex(
        macro_merge.index.union(pd.DatetimeIndex(all_dates))
    ).sort_index().ffill().bfill()
    macro_reindexed = macro_reindexed.loc[all_dates]
    macro_reindexed = macro_reindexed.reset_index().rename(columns={'index': 'date'})
    crash_data = crash_data.merge(macro_reindexed, on='date', how='left')
    print(f'[OK] Merged macro data: {[c for c in macro_reindexed.columns if c != "date"]}')
else:
    print('[WARN] No macro data to merge')

# 4. Merge Fear & Greed by date
if len(fng_df) > 0:
    fng_merge = fng_df.copy()
    fng_merge['date'] = pd.to_datetime(fng_merge['date'])
    crash_data = crash_data.merge(fng_merge[['date', 'fng_value']], on='date', how='left')
    crash_data['fng_value'] = crash_data['fng_value'].ffill().bfill()
    print(f'[OK] Merged Fear & Greed: {crash_data["fng_value"].notna().sum():,} rows with FNG')
else:
    crash_data['fng_value'] = np.nan
    print('[WARN] No Fear & Greed data')

# 5. Merge funding rate via merge_asof (backward, 12h tolerance)
if len(funding_df) > 0:
    funding_merge = funding_df[['timestamp', 'fundingRate']].copy()
    funding_merge = funding_merge.sort_values('timestamp').reset_index(drop=True)
    crash_data = crash_data.sort_values('timestamp').reset_index(drop=True)
    crash_data = pd.merge_asof(
        crash_data, funding_merge,
        on='timestamp', direction='backward',
        tolerance=pd.Timedelta('12h')
    )
    print(f'[OK] Merged funding rate: {crash_data["fundingRate"].notna().sum():,} rows')
else:
    crash_data['fundingRate'] = np.nan
    print('[WARN] No funding rate data')

# 6. Merge open interest via merge_asof (backward, 6h tolerance)
if len(oi_df) > 0:
    oi_merge = oi_df[['timestamp', 'sumOpenInterest', 'sumOpenInterestValue']].copy()
    oi_merge = oi_merge.sort_values('timestamp').reset_index(drop=True)
    crash_data = pd.merge_asof(
        crash_data, oi_merge,
        on='timestamp', direction='backward',
        tolerance=pd.Timedelta('6h')
    )
    print(f'[OK] Merged open interest: {crash_data["sumOpenInterest"].notna().sum():,} rows')
else:
    crash_data['sumOpenInterest'] = np.nan
    crash_data['sumOpenInterestValue'] = np.nan
    print('[WARN] No open interest data')

# 7. Merge long/short ratio via merge_asof
if len(ls_df) > 0:
    ls_merge = ls_df[['timestamp', 'longShortRatio']].copy()
    ls_merge = ls_merge.sort_values('timestamp').reset_index(drop=True)
    crash_data = pd.merge_asof(
        crash_data, ls_merge,
        on='timestamp', direction='backward',
        tolerance=pd.Timedelta('2h')
    )
    print(f'[OK] Merged long/short ratio: {crash_data["longShortRatio"].notna().sum():,} rows')
else:
    crash_data['longShortRatio'] = np.nan
    print('[WARN] No long/short ratio data')

# 8. Merge taker volume via merge_asof
if len(tv_df) > 0:
    tv_merge = tv_df[['timestamp', 'buyVol', 'sellVol', 'buySellRatio']].copy()
    tv_merge = tv_merge.sort_values('timestamp').reset_index(drop=True)
    crash_data = pd.merge_asof(
        crash_data, tv_merge,
        on='timestamp', direction='backward',
        tolerance=pd.Timedelta('2h')
    )
    print(f'[OK] Merged taker volume: {crash_data["buyVol"].notna().sum():,} rows')
else:
    crash_data['buyVol'] = np.nan
    crash_data['sellVol'] = np.nan
    crash_data['buySellRatio'] = np.nan
    print('[WARN] No taker volume data')

# 9. Merge ETH data by exact timestamp match
eth_merge = eth_df[['timestamp', 'close', 'volume']].copy()
eth_merge = eth_merge.rename(columns={'close': 'eth_close', 'volume': 'eth_volume'})
crash_data = crash_data.merge(eth_merge, on='timestamp', how='left')
print(f'[OK] Merged ETH: {crash_data["eth_close"].notna().sum():,} rows with ETH data')

# 10. Print column overview with fill rates
print(f'\nColumn overview ({len(crash_data.columns)} columns, {len(crash_data):,} rows):')
print('-' * 60)
for col in crash_data.columns:
    fill = crash_data[col].notna().mean() * 100
    print(f'  {col:35s} fill: {fill:6.1f}%')

# 11. Save
save_path = os.path.join(DRIVE_DIR, 'crash_dataset_raw.csv')
crash_data.to_csv(save_path, index=False)
crash_data.to_csv(os.path.join(LOCAL_DIR, 'crash_dataset_raw.csv'), index=False)
print(f'\n[DONE] Saved crash_dataset_raw.csv: {len(crash_data):,} rows -> {save_path}')

In [None]:
# =============================================================================
# CELL 6: Feature engineering (51 features)
# =============================================================================

df = crash_data.copy()
df = df.sort_values('timestamp').reset_index(drop=True)

# ---- GROUP 1: BTC Price/Volume (15 features) ----
print('Building Group 1: BTC Price/Volume (15 features) ...')

df['return_1bar'] = df['close'].pct_change(1)
df['return_6bar'] = df['close'].pct_change(6)
df['return_12bar'] = df['close'].pct_change(12)
df['return_48bar'] = df['close'].pct_change(48)
df['return_288bar'] = df['close'].pct_change(288)

df['vol_12bar'] = df['return_1bar'].rolling(12).std()
df['vol_48bar'] = df['return_1bar'].rolling(48).std()
df['vol_ratio'] = df['vol_12bar'] / df['vol_48bar'].replace(0, np.nan)

vol_mean = df['volume'].rolling(48).mean()
df['volume_surge'] = df['volume'] / vol_mean.replace(0, np.nan)
df['volume_trend'] = vol_mean.pct_change(12)

# Consecutive red candles
is_red = (df['close'] < df['open']).astype(int)
streaks = is_red.copy()
for i in range(1, len(streaks)):
    if streaks.iloc[i] == 1:
        streaks.iloc[i] = streaks.iloc[i-1] + 1
df['consecutive_red'] = streaks / 12.0  # Normalize: 12 consecutive = 1.0

# Drawdown from 24h high
rolling_high_24h = df['high'].rolling(288).max()
df['drawdown_24h'] = (df['close'] - rolling_high_24h) / rolling_high_24h.replace(0, np.nan)

# RSI-14 normalized to [-1, 1]
delta = df['close'].diff()
gain = delta.clip(lower=0).rolling(14).mean()
loss = (-delta.clip(upper=0)).rolling(14).mean()
rs = gain / loss.replace(0, np.nan)
rsi = 100 - (100 / (1 + rs))
df['rsi_14_norm'] = (rsi - 50) / 50  # Map 0-100 to -1 to 1

# Bollinger %B
bb_mid = df['close'].rolling(20).mean()
bb_std = df['close'].rolling(20).std()
bb_upper = bb_mid + 2 * bb_std
bb_lower = bb_mid - 2 * bb_std
bb_range = (bb_upper - bb_lower).replace(0, np.nan)
df['bb_pct_b'] = (df['close'] - bb_lower) / bb_range

# VWAP distance
vwap_num = (df['close'] * df['volume']).rolling(48).sum()
vwap_den = df['volume'].rolling(48).sum().replace(0, np.nan)
vwap = vwap_num / vwap_den
df['vwap_distance'] = (df['close'] - vwap) / vwap.replace(0, np.nan)

print('  [OK] 15 BTC features built')

# ---- GROUP 2: Daily Macro (10 features) ----
print('Building Group 2: Daily Macro (10 features) ...')

if 'spx' in df.columns:
    df['spx_return_1d'] = df.groupby('date')['spx'].transform('first')
    df['spx_return_1d'] = df['spx_return_1d'].pct_change(288)  # 1 day = 288 bars
    spx_sma_20 = df['spx'].rolling(288 * 20).mean()
    df['spx_vs_sma'] = (df['spx'] - spx_sma_20) / spx_sma_20.replace(0, np.nan)
else:
    df['spx_return_1d'] = 0.0
    df['spx_vs_sma'] = 0.0

if 'vix' in df.columns:
    df['vix_norm'] = df['vix'] / 30.0  # VIX 30 = 1.0
    df['vix_change'] = df['vix'].pct_change(288)
    df['vix_extreme'] = (df['vix'] > 30).astype(float)
else:
    df['vix_norm'] = 0.0
    df['vix_change'] = 0.0
    df['vix_extreme'] = 0.0

if 'dxy' in df.columns:
    df['dxy_return_1d'] = df['dxy'].pct_change(288)
    dxy_sma = df['dxy'].rolling(288 * 20).mean()
    df['dxy_trend'] = (df['dxy'] - dxy_sma) / dxy_sma.replace(0, np.nan)
else:
    df['dxy_return_1d'] = 0.0
    df['dxy_trend'] = 0.0

if 'us10y' in df.columns:
    df['yield_level'] = df['us10y'] / 5.0  # 5% yield = 1.0
    df['yield_change'] = df['us10y'].pct_change(288)
else:
    df['yield_level'] = 0.0
    df['yield_change'] = 0.0

if 'fng_value' in df.columns:
    df['fng_norm'] = df['fng_value'] / 100.0  # 0-100 -> 0-1
else:
    df['fng_norm'] = 0.5

print('  [OK] 10 macro features built')

# ---- GROUP 3: Derivatives (9 features) ----
print('Building Group 3: Derivatives (9 features) ...')

has_derivatives = df['fundingRate'].notna().sum() > 100

if has_derivatives:
    fr = df['fundingRate'].fillna(0)
    fr_mean = fr.rolling(288).mean()
    fr_std = fr.rolling(288).std().replace(0, np.nan)
    df['funding_z'] = (fr - fr_mean) / fr_std
    df['funding_extreme_long'] = (df['funding_z'] > 2).astype(float)
    df['funding_extreme_short'] = (df['funding_z'] < -2).astype(float)

    if 'sumOpenInterest' in df.columns:
        oi = df['sumOpenInterest'].fillna(method='ffill')
        df['oi_change_1h'] = oi.pct_change(12)   # 12 bars = 1h
        df['oi_change_4h'] = oi.pct_change(48)   # 48 bars = 4h
        oi_pct = oi.pct_change(12).abs()
        oi_thresh = oi_pct.rolling(288).quantile(0.95)
        df['oi_spike'] = (oi_pct > oi_thresh).astype(float)
    else:
        df['oi_change_1h'] = 0.0
        df['oi_change_4h'] = 0.0
        df['oi_spike'] = 0.0

    if 'longShortRatio' in df.columns:
        lsr = df['longShortRatio'].fillna(1.0)
        df['ls_ratio_norm'] = (lsr - 1.0) / 0.5  # 1.0 = neutral
        df['ls_extreme_long'] = (lsr > 2.0).astype(float)
    else:
        df['ls_ratio_norm'] = 0.0
        df['ls_extreme_long'] = 0.0

    if 'buyVol' in df.columns and 'sellVol' in df.columns:
        buy = df['buyVol'].fillna(0)
        sell = df['sellVol'].fillna(0)
        total = (buy + sell).replace(0, np.nan)
        df['taker_imbalance'] = (buy - sell) / total
    else:
        df['taker_imbalance'] = 0.0
else:
    print('  [WARN] Insufficient derivatives data, filling with zeros')
    for feat in ['funding_z', 'funding_extreme_long', 'funding_extreme_short',
                 'oi_change_1h', 'oi_change_4h', 'oi_spike',
                 'ls_ratio_norm', 'ls_extreme_long', 'taker_imbalance']:
        df[feat] = 0.0

print('  [OK] 9 derivatives features built')

# ---- GROUP 4: Intraday Macro (11 features) - ALL ZEROS (placeholders) ----
print('Building Group 4: Intraday Macro (11 placeholders) ...')

intraday_macro_cols = [
    'spx_return_5m', 'spx_return_15m', 'spx_return_1h',
    'spx_momentum_5m', 'spx_direction_5m',
    'vix_return_5m', 'vix_return_1h', 'vix_spike_5m',
    'ndx_return_5m', 'ndx_return_1h',
    'has_intraday_macro'
]
for col in intraday_macro_cols:
    df[col] = 0.0

print('  [OK] 11 intraday macro placeholders set to 0.0')

# ---- GROUP 5: Cross-Asset (6 features) ----
print('Building Group 5: Cross-Asset (6 features) ...')

if 'eth_close' in df.columns and df['eth_close'].notna().sum() > 100:
    df['eth_return_1bar'] = df['eth_close'].pct_change(1)
    df['eth_return_6bar'] = df['eth_close'].pct_change(6)
    btc_eth_ratio = df['close'] / df['eth_close'].replace(0, np.nan)
    df['eth_btc_ratio_change'] = btc_eth_ratio.pct_change(12)
else:
    df['eth_return_1bar'] = 0.0
    df['eth_return_6bar'] = 0.0
    df['eth_btc_ratio_change'] = 0.0

# BTC leads: how much did BTC move in the prior 1/2/3 bars
df['btc_lead_1'] = df['return_1bar'].shift(1)
df['btc_lead_2'] = df['return_1bar'].shift(2)
df['btc_lead_3'] = df['return_1bar'].shift(3)

print('  [OK] 6 cross-asset features built')

# ---- LABELS (multiple horizons) ----
print('Building labels ...')

df['forward_return_1'] = df['close'].shift(-1) / df['close'] - 1  # 5 min
df['forward_return_2'] = df['close'].shift(-2) / df['close'] - 1  # 10 min (PRIMARY)
df['forward_return_6'] = df['close'].shift(-6) / df['close'] - 1  # 30 min

df['label_binary'] = (df['forward_return_2'] > 0).astype(int)
df['label_binary_1bar'] = (df['forward_return_1'] > 0).astype(int)
df['label_binary_6bar'] = (df['forward_return_6'] > 0).astype(int)

print('  [OK] 3 forward return horizons + 3 binary labels')

# ---- Clean up ----
print('Cleaning features ...')

FEATURE_COLS = [
    # Group 1: BTC Price/Volume (15)
    'return_1bar', 'return_6bar', 'return_12bar', 'return_48bar', 'return_288bar',
    'vol_12bar', 'vol_48bar', 'vol_ratio', 'volume_surge', 'volume_trend',
    'consecutive_red', 'drawdown_24h', 'rsi_14_norm', 'bb_pct_b', 'vwap_distance',
    # Group 2: Daily Macro (10)
    'spx_return_1d', 'spx_vs_sma', 'vix_norm', 'vix_change', 'vix_extreme',
    'dxy_return_1d', 'dxy_trend', 'yield_level', 'yield_change', 'fng_norm',
    # Group 3: Derivatives (9)
    'funding_z', 'funding_extreme_long', 'funding_extreme_short',
    'oi_change_1h', 'oi_change_4h', 'oi_spike',
    'ls_ratio_norm', 'ls_extreme_long', 'taker_imbalance',
    # Group 4: Intraday Macro (11)
    'spx_return_5m', 'spx_return_15m', 'spx_return_1h',
    'spx_momentum_5m', 'spx_direction_5m',
    'vix_return_5m', 'vix_return_1h', 'vix_spike_5m',
    'ndx_return_5m', 'ndx_return_1h',
    'has_intraday_macro',
    # Group 5: Cross-Asset (6)
    'eth_return_1bar', 'eth_return_6bar', 'eth_btc_ratio_change',
    'btc_lead_1', 'btc_lead_2', 'btc_lead_3',
]

assert len(FEATURE_COLS) == 51, f'Expected 51 features, got {len(FEATURE_COLS)}'

# Replace inf with NaN, fill NaN with 0
for col in FEATURE_COLS:
    df[col] = df[col].replace([np.inf, -np.inf], np.nan).fillna(0.0)

# Drop rows where primary label is NaN
df = df.dropna(subset=['label_binary']).reset_index(drop=True)

print(f'\nFeature summary:')
print(f'  Total features: {len(FEATURE_COLS)}')
print(f'  Total rows after cleaning: {len(df):,}')
print(f'  Label distribution: UP={df["label_binary"].sum():,} / DOWN={(1-df["label_binary"]).sum():,.0f}')
print(f'  Label balance: {df["label_binary"].mean():.4f} (0.5 = perfectly balanced)')

# Save
save_path = os.path.join(DRIVE_DIR, 'crash_features.csv')
df.to_csv(save_path, index=False)
df.to_csv(os.path.join(LOCAL_DIR, 'crash_features.csv'), index=False)
print(f'\n[DONE] Saved crash_features.csv: {len(df):,} rows, {len(FEATURE_COLS)} features -> {save_path}')

In [None]:
# =============================================================================
# CELL 7: THREE-WAY TRAINING SET COMPARISON
# =============================================================================

# Split data by crash period
crash1_mask = (df['timestamp'] >= pd.Timestamp('2018-01-07')) & (df['timestamp'] <= pd.Timestamp('2018-12-15'))
crash2_mask = (df['timestamp'] >= pd.Timestamp('2021-11-10')) & (df['timestamp'] <= pd.Timestamp('2022-11-21'))
crash3_mask = (df['timestamp'] >= pd.Timestamp('2025-10-06')) & (df['timestamp'] <= pd.Timestamp('2026-02-28'))

crash1 = df[crash1_mask].copy()
crash2 = df[crash2_mask].copy()
crash3 = df[crash3_mask].copy()

print(f'Crash 1 (ICO Bust):          {len(crash1):>8,} rows')
print(f'Crash 2 (Terra/FTX/Fed):     {len(crash2):>8,} rows')
print(f'Crash 3 (Current):           {len(crash3):>8,} rows')
print()

# Crash 3 splits
c3_len = len(crash3)
c3_half = c3_len // 2
c3_70 = int(c3_len * 0.70)
c3_85 = int(c3_len * 0.85)

crash3_first_half = crash3.iloc[:c3_half]
crash3_second_half = crash3.iloc[c3_half:]

crash3_train = crash3.iloc[:c3_70]
crash3_val = crash3.iloc[c3_70:c3_85]
crash3_test = crash3.iloc[c3_85:]

# Base LightGBM params
BASE_PARAMS = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'min_child_samples': 100,
    'lambda_l1': 0.1,
    'lambda_l2': 0.1,
    'verbose': -1,
}

# Smaller model params for Config C (less data)
SMALL_PARAMS = dict(BASE_PARAMS)
SMALL_PARAMS.update({
    'num_leaves': 20,
    'min_child_samples': 50,
    'lambda_l1': 0.3,
    'lambda_l2': 0.3,
})


def train_and_eval(train_X, train_y, val_X, val_y, test_X, test_y,
                   params, sample_weight=None, label='model'):
    """Train LightGBM and return metrics."""
    train_ds = lgb.Dataset(train_X, label=train_y, weight=sample_weight)
    val_ds = lgb.Dataset(val_X, label=val_y, reference=train_ds)

    callbacks = [
        lgb.early_stopping(30),
        lgb.log_evaluation(0),  # suppress per-round output
    ]

    model = lgb.train(
        params,
        train_ds,
        num_boost_round=500,
        valid_sets=[val_ds],
        callbacks=callbacks,
    )

    val_pred = model.predict(val_X)
    test_pred = model.predict(test_X)

    val_acc = accuracy_score(val_y, (val_pred > 0.5).astype(int))
    test_acc = accuracy_score(test_y, (test_pred > 0.5).astype(int))

    try:
        val_auc = roc_auc_score(val_y, val_pred)
    except ValueError:
        val_auc = 0.5
    try:
        test_auc = roc_auc_score(test_y, test_pred)
    except ValueError:
        test_auc = 0.5

    pred_std = float(np.std(test_pred))
    n_rounds = model.best_iteration if model.best_iteration else model.num_trees()

    # Top 10 features
    importance = model.feature_importance(importance_type='gain')
    feat_imp = sorted(zip(FEATURE_COLS, importance), key=lambda x: x[1], reverse=True)[:10]

    print(f'\n  {label}:')
    print(f'    Val acc={val_acc:.4f}  Test acc={test_acc:.4f}  Test AUC={test_auc:.4f}  Rounds={n_rounds}')
    print(f'    Pred std={pred_std:.4f}')
    print(f'    Top 10 features:')
    for fname, fval in feat_imp:
        print(f'      {fname:30s} gain={fval:.1f}')

    return {
        'model': model,
        'val_acc': val_acc,
        'test_acc': test_acc,
        'val_auc': val_auc,
        'test_auc': test_auc,
        'pred_std': pred_std,
        'n_rounds': n_rounds,
        'test_pred': test_pred,
        'feat_imp': feat_imp,
    }


results = {}

# ---- Config A: ALL_CRASHES ----
print('=' * 60)
print('Config A: ALL_CRASHES')
print('  Train: Crash1 (weight=0.5) + Crash2 (weight=1.0)')
print('  Val:   Crash3 first half')
print('  Test:  Crash3 second half')
print('=' * 60)

train_A = pd.concat([crash1, crash2], ignore_index=True)
weight_A = np.concatenate([
    np.full(len(crash1), 0.5),
    np.full(len(crash2), 1.0),
])

results['A'] = train_and_eval(
    train_A[FEATURE_COLS].values, train_A['label_binary'].values,
    crash3_first_half[FEATURE_COLS].values, crash3_first_half['label_binary'].values,
    crash3_second_half[FEATURE_COLS].values, crash3_second_half['label_binary'].values,
    BASE_PARAMS, sample_weight=weight_A, label='ALL_CRASHES'
)
results['A']['train_size'] = len(train_A)

# ---- Config B: RECENT_ONLY ----
print('\n' + '=' * 60)
print('Config B: RECENT_ONLY')
print('  Train: Crash2 (weight=1.0)')
print('  Val:   Crash3 first half')
print('  Test:  Crash3 second half')
print('=' * 60)

results['B'] = train_and_eval(
    crash2[FEATURE_COLS].values, crash2['label_binary'].values,
    crash3_first_half[FEATURE_COLS].values, crash3_first_half['label_binary'].values,
    crash3_second_half[FEATURE_COLS].values, crash3_second_half['label_binary'].values,
    BASE_PARAMS, label='RECENT_ONLY'
)
results['B']['train_size'] = len(crash2)

# ---- Config C: THIS_CRASH_ONLY ----
print('\n' + '=' * 60)
print('Config C: THIS_CRASH_ONLY')
print('  Train: Crash3 first 70%')
print('  Val:   Crash3 next 15%')
print('  Test:  Crash3 last 15%')
print('=' * 60)

results['C'] = train_and_eval(
    crash3_train[FEATURE_COLS].values, crash3_train['label_binary'].values,
    crash3_val[FEATURE_COLS].values, crash3_val['label_binary'].values,
    crash3_test[FEATURE_COLS].values, crash3_test['label_binary'].values,
    SMALL_PARAMS, label='THIS_CRASH_ONLY'
)
results['C']['train_size'] = len(crash3_train)

# ---- Comparison Table ----
print('\n\n')
print('TRAINING SET COMPARISON')
print('=' * 76)
print(f'{"Config":26s} {"Train":>8s} {"Val Acc":>9s} {"Test Acc":>9s} {"AUC":>7s} {"Rnds":>6s} {"PStd":>7s}')
print('-' * 76)

config_names = {
    'A': 'ALL_CRASHES (18+21+25)',
    'B': 'RECENT_ONLY (21+25)',
    'C': 'THIS_CRASH_ONLY (25-26)',
}

for key in ['A', 'B', 'C']:
    r = results[key]
    print(f'{config_names[key]:26s} {r["train_size"]:>8,} {r["val_acc"]:>9.4f} {r["test_acc"]:>9.4f} '
          f'{r["test_auc"]:>7.4f} {r["n_rounds"]:>6d} {r["pred_std"]:>7.4f}')

# Find winner
winner_key = max(results, key=lambda k: results[k]['test_acc'])
print(f'\n>>> WINNER: {config_names[winner_key]} (test_acc={results[winner_key]["test_acc"]:.4f})')

# ---- Train winner at ALL THREE horizons ----
print('\n\n')
print('HORIZON COMPARISON (using winning config)')
print('=' * 60)

horizon_results = {}

# Determine train/val/test splits based on winner
if winner_key == 'A':
    h_train = train_A
    h_weight = weight_A
    h_val = crash3_first_half
    h_test = crash3_second_half
    h_params = BASE_PARAMS
elif winner_key == 'B':
    h_train = crash2
    h_weight = None
    h_val = crash3_first_half
    h_test = crash3_second_half
    h_params = BASE_PARAMS
else:  # C
    h_train = crash3_train
    h_weight = None
    h_val = crash3_val
    h_test = crash3_test
    h_params = SMALL_PARAMS

for horizon_name, label_col in [('1-bar (5min)', 'label_binary_1bar'),
                                 ('2-bar (10min)', 'label_binary'),
                                 ('6-bar (30min)', 'label_binary_6bar')]:
    # Drop NaN labels for this horizon
    train_mask = h_train[label_col].notna()
    val_mask = h_val[label_col].notna()
    test_mask = h_test[label_col].notna()

    ht = h_train[train_mask]
    hv = h_val[val_mask]
    hte = h_test[test_mask]

    hw = h_weight[train_mask.values] if h_weight is not None else None

    res = train_and_eval(
        ht[FEATURE_COLS].values, ht[label_col].values,
        hv[FEATURE_COLS].values, hv[label_col].values,
        hte[FEATURE_COLS].values, hte[label_col].values,
        h_params, sample_weight=hw, label=horizon_name
    )
    horizon_results[horizon_name] = res

print('\nHorizon comparison:')
print(f'{"Horizon":20s} {"Test Acc":>9s} {"AUC":>7s}')
print('-' * 40)
for hname, hres in horizon_results.items():
    print(f'{hname:20s} {hres["test_acc"]:>9.4f} {hres["test_auc"]:>7.4f}')

# ---- Save best model ----
best = results[winner_key]
best_model = best['model']

# Save as pickle
pkl_path = os.path.join(DRIVE_DIR, 'crash_best_lightgbm.pkl')
with open(pkl_path, 'wb') as f:
    pickle.dump(best_model, f)
print(f'\n[OK] Saved model pickle: {pkl_path}')

# Save as LightGBM text
txt_path = os.path.join(DRIVE_DIR, 'crash_best_lightgbm.txt')
best_model.save_model(txt_path)
print(f'[OK] Saved model text: {txt_path}')

# Save metadata
meta = {
    'winner': config_names[winner_key],
    'winner_key': winner_key,
    'feature_cols': FEATURE_COLS,
    'n_features': len(FEATURE_COLS),
    'primary_horizon': '2-bar (10min)',
    'comparison': {},
    'horizon_comparison': {},
    'trained_at': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
}
for key in ['A', 'B', 'C']:
    r = results[key]
    meta['comparison'][config_names[key]] = {
        'train_size': r['train_size'],
        'val_acc': round(r['val_acc'], 4),
        'test_acc': round(r['test_acc'], 4),
        'val_auc': round(r['val_auc'], 4),
        'test_auc': round(r['test_auc'], 4),
        'pred_std': round(r['pred_std'], 4),
        'n_rounds': r['n_rounds'],
    }
for hname, hres in horizon_results.items():
    meta['horizon_comparison'][hname] = {
        'test_acc': round(hres['test_acc'], 4),
        'test_auc': round(hres['test_auc'], 4),
    }

meta_path = os.path.join(DRIVE_DIR, 'crash_best_meta.json')
with open(meta_path, 'w') as f:
    json.dump(meta, f, indent=2)
print(f'[OK] Saved metadata: {meta_path}')

# Also save locally
with open(os.path.join(LOCAL_DIR, 'crash_best_lightgbm.pkl'), 'wb') as f:
    pickle.dump(best_model, f)
best_model.save_model(os.path.join(LOCAL_DIR, 'crash_best_lightgbm.txt'))
with open(os.path.join(LOCAL_DIR, 'crash_best_meta.json'), 'w') as f:
    json.dump(meta, f, indent=2)

print()
print('[DONE] Model training and comparison complete')

In [None]:
# =============================================================================
# CELL 8: Calibration analysis
# =============================================================================

# Use best model's test predictions
best_result = results[winner_key]
test_pred = best_result['test_pred']

# Determine test labels based on winner config
if winner_key == 'C':
    test_labels = crash3_test['label_binary'].values
else:
    test_labels = crash3_second_half['label_binary'].values

# Calibration buckets
up_buckets = [
    ('50-52% UP', 0.50, 0.52),
    ('52-55% UP', 0.52, 0.55),
    ('55-60% UP', 0.55, 0.60),
    ('60-65% UP', 0.60, 0.65),
    ('65-70% UP', 0.65, 0.70),
    ('70%+   UP', 0.70, 1.01),
]

down_buckets = [
    ('50-52% DN', 0.48, 0.50),
    ('52-55% DN', 0.45, 0.48),
    ('55-60% DN', 0.40, 0.45),
    ('60-65% DN', 0.35, 0.40),
    ('65-70% DN', 0.30, 0.35),
    ('70%+   DN', 0.00, 0.30),
]

print('CALIBRATION ANALYSIS')
print('=' * 70)
print(f'{"Bucket":14s} {"Count":>7s} {"Pct":>6s} {"Accuracy":>10s} {"Avg Pred":>10s}')
print('-' * 70)

cal_rows = []

for bucket_name, lo, hi in up_buckets:
    mask = (test_pred >= lo) & (test_pred < hi)
    count = mask.sum()
    if count > 0:
        acc = accuracy_score(test_labels[mask], (test_pred[mask] > 0.5).astype(int))
        avg_pred = test_pred[mask].mean()
        pct = count / len(test_pred) * 100
        print(f'{bucket_name:14s} {count:>7,} {pct:>5.1f}% {acc:>10.4f} {avg_pred:>10.4f}')
        cal_rows.append({'bucket': bucket_name, 'count': count, 'pct': pct,
                         'accuracy': acc, 'avg_pred': avg_pred})
    else:
        print(f'{bucket_name:14s} {0:>7d}   --- {"---":>10s} {"---":>10s}')
        cal_rows.append({'bucket': bucket_name, 'count': 0, 'pct': 0,
                         'accuracy': None, 'avg_pred': None})

print()

for bucket_name, lo, hi in down_buckets:
    mask = (test_pred >= lo) & (test_pred < hi)
    count = mask.sum()
    if count > 0:
        # For DOWN buckets, accuracy = how often label was 0 (DOWN)
        acc = accuracy_score(test_labels[mask], (test_pred[mask] > 0.5).astype(int))
        avg_pred = test_pred[mask].mean()
        pct = count / len(test_pred) * 100
        print(f'{bucket_name:14s} {count:>7,} {pct:>5.1f}% {acc:>10.4f} {avg_pred:>10.4f}')
        cal_rows.append({'bucket': bucket_name, 'count': count, 'pct': pct,
                         'accuracy': acc, 'avg_pred': avg_pred})
    else:
        print(f'{bucket_name:14s} {0:>7d}   --- {"---":>10s} {"---":>10s}')
        cal_rows.append({'bucket': bucket_name, 'count': 0, 'pct': 0,
                         'accuracy': None, 'avg_pred': None})

# Summary stats
max_conf = float(test_pred.max())
min_conf = float(test_pred.min())
high_conf_mask = (test_pred > 0.55) | (test_pred < 0.45)
count_55plus = high_conf_mask.sum()
if count_55plus > 0:
    acc_55plus = accuracy_score(test_labels[high_conf_mask],
                                (test_pred[high_conf_mask] > 0.5).astype(int))
else:
    acc_55plus = 0.0

print()
print('-' * 70)
print(f'Max confidence (UP):   {max_conf:.4f}')
print(f'Max confidence (DOWN): {1 - min_conf:.4f}')
print(f'Count at 55%+:         {count_55plus:,} ({count_55plus/len(test_pred)*100:.1f}% of test set)')
print(f'Accuracy at 55%+:      {acc_55plus:.4f}')

if acc_55plus > 0.52:
    print(f'Recommended threshold: 0.55 (accuracy {acc_55plus:.4f} > 0.52 baseline)')
elif count_55plus > 0:
    print(f'Recommended threshold: 0.52 (55%+ accuracy {acc_55plus:.4f} is marginal)')
else:
    print('Recommended threshold: 0.50 (no high-confidence predictions available)')

# Save calibration
cal_df = pd.DataFrame(cal_rows)
cal_path = os.path.join(DRIVE_DIR, 'calibration_analysis.csv')
cal_df.to_csv(cal_path, index=False)
cal_df.to_csv(os.path.join(LOCAL_DIR, 'calibration_analysis.csv'), index=False)
print(f'\n[OK] Saved calibration_analysis.csv -> {cal_path}')

# Final summary
print()
print()
print('=' * 70)
print('FINAL SUMMARY')
print('=' * 70)
print(f'Winner config:      {meta["winner"]}')
print(f'Features:           {len(FEATURE_COLS)} (51 features, all scale-invariant)')
print(f'Primary horizon:    2-bar (10 minutes)')
print(f'Test accuracy:      {best_result["test_acc"]:.4f}')
print(f'Test AUC:           {best_result["test_auc"]:.4f}')
print(f'Prediction spread:  {best_result["pred_std"]:.4f}')
print(f'Model rounds:       {best_result["n_rounds"]}')
print()
print('Saved files:')
print(f'  Model:       {DRIVE_DIR}/crash_best_lightgbm.pkl')
print(f'  Model text:  {DRIVE_DIR}/crash_best_lightgbm.txt')
print(f'  Metadata:    {DRIVE_DIR}/crash_best_meta.json')
print(f'  Features:    {DRIVE_DIR}/crash_features.csv')
print(f'  Calibration: {DRIVE_DIR}/calibration_analysis.csv')
print()
print('[DONE] Crash regime v3 training complete')