<a href="https://colab.research.google.com/github/jadenfix/mft_crypto_research/blob/main/gpu_crypto_backtest_template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GPU‑Optimized Crypto ML Backtesting

This notebook ingests 1‑minute OHLCV data for **ADA, BTC, ETH, and SOL**, trains **GPU‑accelerated** models, and backtests a rolling meta‑strategy that **re‑trains every six hours (≈ 360 bars) and adjusts ensemble weights every 60 executed trades**.

> **Prerequisites**
> * NVIDIA T4 (or compatible) GPU with CUDA drivers
> * Python ≥3.10
> * The libraries installed in the next cell

In [None]:
# !pip -q install polars numpy pandas scikit-learn xgboost lightgbm catboost backtesting

In [None]:
import polars as pl, numpy as np, pandas as pd
from collections import deque
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from catboost import CatBoostRegressor
from backtesting import Backtest, Strategy

# Reproducibility
np.random.seed(42)

## Configuration – paths and constants

In [None]:
# 👉 Edit these paths to point to your CSV files
TRAIN_PATHS = {
    'ada': 'ml_training_ada.csv',
    'btc': 'ml_training_btc.csv',
    'eth': 'ml_training_eth.csv',
    'sol': 'ml_training_sol.csv',
}

BACKTEST_PATHS = {
    'ada': 'backtest_ada.csv',
    'btc': 'backtest_btc.csv',
    'eth': 'backtest_eth.csv',
    'sol': 'backtest_sol.csv',
}

RETRAIN_EVERY   = 360   # bars (~6 hours)
REWEIGHT_TRADES = 60    # trades between dynamic weight refreshes
WINDOW_LEN      = 10_000  # rolling buffer length
TARGET_SUFFIX   = '_r_next'  # Adjust if your files use a different target name

## Helper functions – verification & interpolation

In [None]:
def verify_cols(df: pl.DataFrame, asset: str) -> pl.DataFrame:
    """Ensure OHLCV columns are present (e.g., 'btc_open', 'btc_close', ...)."""
    needed = [f'{asset}_{c}' for c in ('open', 'high', 'low', 'close', 'volume')]
    missing = [c for c in needed if c not in df.columns]
    if missing:
        raise ValueError(f"{asset}: missing columns {missing}")
    return df

def read_csv_interp(path: str, asset: str) -> pl.DataFrame:
    """Read CSV → verify columns → sort by timestamp → linear‑interpolate numeric cols."""
    df = pl.read_csv(path, try_parse_dates=True, infer_schema_length=0)
    df = verify_cols(df, asset)
    if 'timestamp' in df.columns:
        df = df.sort('timestamp')
    num_cols = [c for c, dt in zip(df.columns, df.dtypes)
                if pl.datatypes.is_numeric_dtype(dt)]
    df = df.with_columns(pl.col(num_cols).interpolate().forward().backward())
    return df

## GPU‑ready model factory

In [None]:
XGB_GPU_KW = dict(tree_method='gpu_hist', predictor='gpu_predictor', gpu_id=0, max_bin=256)
LGB_GPU_KW = dict(device='gpu', gpu_platform_id=0, gpu_device_id=0)
CAT_GPU_KW = dict(task_type='GPU', devices='0')

def fit_models_on_gpu(X: np.ndarray, y: np.ndarray):
    models = {
        'xgb': XGBRegressor(**XGB_GPU_KW, n_estimators=400, learning_rate=0.05,
                           max_depth=6, random_state=42),
        'lgb': lgb.LGBMRegressor(**LGB_GPU_KW, n_estimators=400, learning_rate=0.05,
                               max_depth=-1, random_state=42),
        'cat': CatBoostRegressor(**CAT_GPU_KW, iterations=400, learning_rate=0.05,
                                 depth=6, verbose=False, random_state=42),
        'rf' : RandomForestRegressor(n_estimators=300, max_depth=None,
                                     n_jobs=-1, random_state=42),  # CPU fallback
    }
    return {k: m.fit(X, y) for k, m in models.items()}

## Meta‑strategy definition – dynamic retraining & weighting

In [None]:
def build_meta_strategy(asset: str, base_models: dict, feat_cols: list[str],
                        trade_threshold: float = 0.0,
                        retrain_every: int = RETRAIN_EVERY,
                        reweight_every_trades: int = REWEIGHT_TRADES,
                        window_len: int = WINDOW_LEN):
    class MetaVoteStrategyV2(Strategy):
        def init(self):
            self.models    = base_models
            self.weights   = {k: 1/len(base_models) for k in base_models}
            self.retrain_c = 0
            self.trade_c   = 0
            self.X_buf     = deque(maxlen=window_len)
            self.y_buf     = deque(maxlen=window_len)

        def _pred(self):
            feats = np.array([self.data.df[col].iloc[-1] for col in feat_cols], dtype=np.float32).reshape(1, -1)
            preds = {k: m.predict(feats)[0] for k, m in self.models.items()}
            return sum(preds[k] * self.weights[k] for k in preds)

        def _update_weights(self):
            rets = self.data.df['Close'].pct_change().iloc[-60:]
            if rets.isna().all():
                return
            mae = {}
            X_recent = np.array(self.X_buf, dtype=np.float32)[-60:]
            if len(X_recent) < 60:
                return
            for k, m in self.models.items():
                y_hat = m.predict(X_recent)
                mae[k] = np.mean(np.abs(y_hat - rets.values))
            total = sum(1/(mae[k] + 1e-8) for k in mae)
            self.weights = {k: (1/(mae[k] + 1e-8)) / total for k in mae}

        def next(self):
            # Buffer the previous bar's features/label
            if len(self.data.df) > 1:
                feat_vec = [self.data.df[col].iloc[-2] for col in feat_cols]
                label = self.data.df['Close'].pct_change().iloc[-1]
                if not np.isnan(label):
                    self.X_buf.append(feat_vec)
                    self.y_buf.append(label)

            # Trading decision
            z = self._pred()
            if z > trade_threshold and not self.position.is_long:
                self.position.close(); self.buy(); self.trade_c += 1
            elif z < -trade_threshold and not self.position.is_short:
                self.position.close(); self.sell(); self.trade_c += 1

            # Scheduled retrain
            self.retrain_c += 1
            if self.retrain_c % retrain_every == 0 and len(self.X_buf) > 500:
                X_new = np.asarray(self.X_buf, dtype=np.float32)
                y_new = np.asarray(self.y_buf, dtype=np.float32)
                self.models = fit_models_on_gpu(X_new, y_new)

            # Scheduled weight refresh
            if self.trade_c >= reweight_every_trades:
                self._update_weights()
                self.trade_c = 0

    return MetaVoteStrategyV2

## Load & prepare training data – interpolation + simple features

In [None]:
data_train = {}
feature_cols = {}

for asset, path in TRAIN_PATHS.items():
    df = read_csv_interp(path, asset)

    # Create target column if not already present
    if f'{asset}{TARGET_SUFFIX}' not in df.columns:
        df = df.with_columns(
            (pl.col(f'{asset}_close').pct_change().shift(-1)).alias(f'{asset}{TARGET_SUFFIX}')
        )

    # Simple features: percentage change of OHLCV
    feats = []
    for col in ('open', 'high', 'low', 'close', 'volume'):
        base = f'{asset}_{col}'
        df = df.with_columns(pl.col(base).pct_change().alias(f'{base}_ret'))
        feats.append(f'{base}_ret')

    data_train[asset] = df.drop_nulls()
    feature_cols[asset] = feats

print({k: data_train[k].shape for k in data_train})

## Train initial models (GPU‑accelerated)

In [None]:
asset_models = {}
for asset, df in data_train.items():
    y = df[f'{asset}{TARGET_SUFFIX}'].to_numpy(dtype=np.float32)
    X = df.select(feature_cols[asset]).to_numpy(dtype=np.float32)
    print(f'Training models for {asset.upper()}:  X={X.shape}  y={y.shape}')
    asset_models[asset] = fit_models_on_gpu(X, y)

## Example backtest – choose an asset

In [None]:
asset = 'btc'  # Change as desired ('ada', 'eth', 'sol')
bt_df = read_csv_interp(BACKTEST_PATHS[asset], asset)

# The strategy expects 'Close' for price – map the asset's close price
bt_df = bt_df.rename({f'{asset}_close': 'Close'})

# Backtesting.py needs a pandas DataFrame with datetime index
bt_pd = bt_df.select(['timestamp', 'Close'] + feature_cols[asset]).to_pandas()
bt_pd.set_index('timestamp', inplace=True)

Strat = build_meta_strategy(asset, asset_models[asset], feature_cols[asset])

bt = Backtest(bt_pd, Strat, cash=1_000_000, commission=0.0015, exclusive_orders=False)
results = bt.run()
results

## Visualize equity curve

In [None]:
bt.plot()

### Next steps
* Loop the backtest for all assets.
* Enrich feature engineering (technical indicators, regime filters, etc.).
* Experiment with hyper‑parameters and different ensemble combinations.

Happy trading 🚀