# Crypto Backtesting – Full ML & DL Stack

This notebook trains multiple ML/DL models (RandomForest, XGBoost, AdaBoost, MLP, HMM regime, CNN, LSTM, GRU) on minute‑level OHLCV data for multiple cryptocurrencies and evaluates each with **Backtesting.py**.

Adjust `CSV_PATHS` and hyper‑parameters to your liking, then run all cells.

In [None]:
!pip -q install pandas numpy ta backtesting scikit-learn xgboost hmmlearn tensorflow matplotlib

In [None]:
import pandas as pd, numpy as np
from pathlib import Path
from ta.volatility import BollingerBands
from ta.trend import SMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, MLPClassifier
from sklearn.model_selection import cross_val_score, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from hmmlearn.hmm import GaussianHMM
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Flatten
from tensorflow.keras.callbacks import EarlyStopping
from backtesting import Backtest, Strategy
import matplotlib.pyplot as plt

pd.set_option('display.width', 140)

## 1  Load and feature‑engineer data

In [None]:
CSV_PATHS = {
    'BTC': Path('/Users/jadenfix/hedge-fund-in-a-box/cpp_engine/data/2024_2025/2024_to_april_2025_btc_data.csv'),
    'ETH': Path('/Users/jadenfix/hedge-fund-in-a-box/cpp_engine/data/2024_2025/2024_to_april_2025_eth_data.csv'),
    'SOL': Path('/Users/jadenfix/hedge-fund-in-a-box/cpp_engine/data/2024_2025/2024_to_april_2025_sol_data.csv'),
    'ADA': Path('/Users/jadenfix/hedge-fund-in-a-box/cpp_engine/data/2024_2025/2024_to_april_2025_ada_data.csv')
}

def load_crypto(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    df['datetime'] = pd.to_datetime(df['date_only'] + ' ' + df['time_only'])
    df = df.set_index('datetime').sort_index()
    df = df.rename(columns=str.lower)[['open','high','low','close','volume']]
    return df

def add_indicators(df):
    c = df['close']
    df['sma_short'] = SMAIndicator(c, 14).sma_indicator()
    df['sma_long']  = SMAIndicator(c, 50).sma_indicator()
    df['sma_ratio'] = df['sma_short'] / df['sma_long']
    df['rsi']       = RSIIndicator(c, 14).rsi()
    bb = BollingerBands(c, 20, 2)
    df['bb_high']   = bb.bollinger_hband()
    df['bb_low']    = bb.bollinger_lband()
    df['bb_width']  = (df['bb_high'] - df['bb_low']) / c
    stoch = StochasticOscillator(df['high'], df['low'], c, 14)
    df['stoch'] = stoch.stoch()
    df['return'] = c.pct_change().shift(-1)
    df['volatility'] = c.rolling(window=14).std()
    df['direction'] = np.sign(df['return']).replace(0, np.nan).bfill()
    return df.dropna()

# Process all cryptocurrencies
crypto_data = {coin: add_indicators(load_crypto(path)) for coin, path in CSV_PATHS.items()}

## 2  Train/test split with cross-validation

In [None]:
FEATURES = ['sma_short', 'sma_long', 'sma_ratio', 'rsi', 'bb_high', 'bb_low', 'bb_width', 'stoch', 'volatility']

def prepare_data(df):
    df_bt = df.rename(columns={'open':'Open','high':'High','low':'Low','close':'Close','volume':'Volume'}).dropna()
    X, y = df[FEATURES], df['direction']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y

# Prepare data for each cryptocurrency
crypto_datasets = {coin: prepare_data(data) for coin, data in crypto_data.items()}

## 3  Fit classical ML models with robust cross-validation

In [None]:
def train_robust_model(X, y, model_class, **kwargs):
    tscv = TimeSeriesSplit(n_splits=5)
    model = model_class(**kwargs)
    cv_scores = cross_val_score(model, X, y, cv=tscv, scoring='balanced_accuracy')
    print(f"Cross-validation scores: {cv_scores}, Mean: {cv_scores.mean():.4f} ± {cv_scores.std()*2:.4f}")
    model.fit(X, y)
    return model

# Train models for each cryptocurrency
crypto_models = {}
for coin, (X, y) in crypto_datasets.items():
    print(f"\nTraining models for {coin}:")
    crypto_models[coin] = {
        'RF': train_robust_model(X, y, RandomForestClassifier, n_estimators=300, max_depth=6, class_weight='balanced', random_state=42),
        'XGB': train_robust_model(X, y, XGBClassifier, n_estimators=250, max_depth=4, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, eval_metric='logloss', random_state=42),
        'ADB': train_robust_model(X, y, AdaBoostClassifier, n_estimators=200, learning_rate=0.5, random_state=42),
        'MLP': train_robust_model(X, y, MLPClassifier, hidden_layer_sizes=(64,32), max_iter=200, batch_size=128, solver='adam', random_state=42)
    }

## 4  HMM bull/bear regime

In [None]:
def train_hmm(df):
    hmm = GaussianHMM(2, 'full', n_iter=150, random_state=42).fit(df[['return']].values)
    states = hmm.predict(df[['return']].values)
    df['hmm_signal'] = pd.Series(states, index=df.index).map({states[0]:1, 1-states[0]:-1})
    return hmm, states

# Train HMM for each cryptocurrency
crypto_hmm = {coin: train_hmm(data) for coin, data in crypto_data.items()}

## 5  Deep Learning models with early stopping

In [None]:
WINDOW = 20

def prepare_dl_data(X, y):
    X_dl = np.array([X[i:i+WINDOW] for i in range(len(X)-WINDOW)])
    y_dl = y[WINDOW:]
    return X_dl, y_dl

def make_cnn(shape):
    m = Sequential([
        Conv1D(32, 3, activation='relu', input_shape=shape),
        Conv1D(16, 3, activation='relu'),
        Flatten(), Dense(16, activation='relu'), Dense(1, activation='tanh')
    ])
    m.compile('adam', loss='mse')
    return m

def make_rnn(layer, shape):
    m = Sequential([
        layer(32, input_shape=shape),
        Dense(16, activation='relu'), Dense(1, activation='tanh')
    ])
    m.compile('adam', loss='mse')
    return m

early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

# Train deep learning models for each cryptocurrency
crypto_dl_models = {}
for coin, (X, y) in crypto_datasets.items():
    X_dl, y_dl = prepare_dl_data(X, y)
    crypto_dl_models[coin] = {
        'CNN': make_cnn((WINDOW, X_dl.shape[2])),
        'LSTM': make_rnn(LSTM, (WINDOW, X_dl.shape[2])),
        'GRU': make_rnn(GRU, (WINDOW, X_dl.shape[2]))
    }
    
    for model_name, model in crypto_dl_models[coin].items():
        model.fit(X_dl, y_dl, epochs=50, batch_size=128, verbose=0, callbacks=[early_stopping])

## 6  Backtesting strategies with comprehensive performance tracking

In [None]:
class MLStrategy(Strategy):
    model = None
    feats = FEATURES
    window = WINDOW
    def init(self): pass
    def _predict(self):
        if isinstance(self.model, tf.keras.Model):
            if len(self.data) < self.window: return 0
            x = self.data.df[self.feats].iloc[-self.window:].values
            return float(self.model.predict(x[np.newaxis], verbose=0)[0,0])
        elif self.model is None:
            return self.data.df['hmm_signal'].iloc[-1]
        else:
            row = [self.data.df[f].iloc[-1] for f in self.feats]
            return self.model.predict([row])[0]
    def next(self):
        sig = np.sign(self._predict())
        if sig > 0 and not self.position.is_long:
            self.position.close(); self.buy()
        elif sig < 0 and not self.position.is_short:
            self.position.close(); self.sell()

# Comprehensive backtesting results
results = {}
for coin, data in crypto_data.items():
    df_bt = data.rename(columns={'open':'Open','high':'High','low':'Low','close':'Close','volume':'Volume'}).dropna()
    results[coin] = {}
    
    # Classical ML models
    for name, mdl in crypto_models[coin].items():
        Strat = type(f'{name}Strat', (MLStrategy,), {'model': mdl})
        stats = Backtest(df_bt, Strat, cash=10_000, commission=.001, exclusive_orders=True).run()
        results[coin][name] = stats
    
    # Deep Learning models
    for name, mdl in crypto_dl_models[coin].items():
        Strat = type(f'{name}Strat', (MLStrategy,), {'model': mdl})
        stats = Backtest(df_bt, Strat, cash=10_000, commission=.001, exclusive_orders=True).run()
        results[coin][name] = stats
    
    # HMM Strategy
    Strat = type('HMMStrat', (MLStrategy,), {'model': None})
    stats = Backtest(df_bt, Strat, cash=10_000, commission=.001, exclusive_orders=True).run()
    results[coin]['HMM'] = stats

# Print and visualize results
for coin, coin_results in results.items():
    print(f"\n{coin} Cryptocurrency Results:")
    for name, stats in coin_results.items():
        print(f"{name:5s}: Return {stats['Return [%]']:.2f}%  |  Equity ${stats['_equity_final']:.0f}")

# Optional: Create a comparative visualization
plt.figure(figsize=(15, 10))
for coin, coin_results in results.items():
    returns = [stats['Return [%]'] for stats in coin_results.values()]
    plt.bar([f"{coin} - {name}" for name in coin_results.keys()], returns)
plt.title('Model Performance Across Cryptocurrencies')
plt.ylabel('Return [%]')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()