# Crypto AI Backtest (Multi-Coin)

This notebook trains an AI model on BTC then applies it to BTC, GALA and XRP. It backtests a Bollinger+AI-filtered strategy on each coin and shows performance metrics and equity curves.

**Run in Google Colab or Jupyter.** Uncomment the pip installs if needed.

In [1]:
# Install packages if running in a fresh environment
!pip install yfinance pandas numpy ta scikit-learn joblib vectorbt matplotlib


Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting vectorbt
  Downloading vectorbt-0.28.1-py3-none-any.whl.metadata (12 kB)
Collecting dateparser (from vectorbt)
  Downloading dateparser-1.2.2-py3-none-any.whl.metadata (29 kB)
Collecting schedule (from vectorbt)
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Collecting mypy_extensions (from vectorbt)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets>=7.0.0->vectorbt)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading vectorbt-0.28.1-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.7/527.7 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dateparser-1.2.2-py3-none-any.whl (315 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m

In [16]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
import vectorbt as vbt
import matplotlib.pyplot as plt

pd.options.display.max_columns = 100


## Step 1 — Download 30m OHLCV data for BTC, GALA, XRP (60 days)

In [17]:
symbols = ['BTC-USD', 'GALA-USD', 'XRP-USD']
interval = '30m'
period = '60d'

price_data = {}
for sym in symbols:
    print(f'Downloading {sym}...')
    df = yf.download(sym, period=period, interval=interval, progress=False)
    if df.empty:
        print(f'Warning: no data for {sym}')
    price_data[sym] = df.dropna()

# show samples
for s, df in price_data.items():
    print(s, 'rows:', len(df))
    display(df.head())


Downloading BTC-USD...


  df = yf.download(sym, period=period, interval=interval, progress=False)


Downloading GALA-USD...


  df = yf.download(sym, period=period, interval=interval, progress=False)


Downloading XRP-USD...


  df = yf.download(sym, period=period, interval=interval, progress=False)


BTC-USD rows: 2862


Price,Close,High,Low,Open,Volume
Ticker,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-28 00:00:00+00:00,119666.90625,119684.445312,119391.039062,119443.953125,1335595008
2025-07-28 00:30:00+00:00,119329.554688,119672.867188,119202.328125,119672.867188,1386631168
2025-07-28 01:00:00+00:00,119366.0625,119515.492188,119129.507812,119252.304688,1314926592
2025-07-28 01:30:00+00:00,119043.179688,119425.992188,119019.257812,119425.992188,2064678912
2025-07-28 02:00:00+00:00,119514.328125,119514.328125,119066.179688,119066.179688,1844445184


GALA-USD rows: 2862


Price,Close,High,Low,Open,Volume
Ticker,GALA-USD,GALA-USD,GALA-USD,GALA-USD,GALA-USD
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-28 00:00:00+00:00,0.018465,0.018499,0.018343,0.018354,2641208
2025-07-28 00:30:00+00:00,0.018349,0.018482,0.018338,0.018442,2989632
2025-07-28 01:00:00+00:00,0.018145,0.01825,0.018145,0.018226,771560
2025-07-28 01:30:00+00:00,0.018074,0.018148,0.018065,0.018148,300816
2025-07-28 02:00:00+00:00,0.0183,0.0183,0.018075,0.018081,2867160


XRP-USD rows: 2862


Price,Close,High,Low,Open,Volume
Ticker,XRP-USD,XRP-USD,XRP-USD,XRP-USD,XRP-USD
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-28 00:00:00+00:00,3.273717,3.273717,3.229481,3.24068,145593344
2025-07-28 00:30:00+00:00,3.270307,3.280304,3.260545,3.273969,260034048
2025-07-28 01:00:00+00:00,3.243617,3.270296,3.243205,3.270296,127732224
2025-07-28 01:30:00+00:00,3.237179,3.243737,3.227086,3.243737,149325824
2025-07-28 02:00:00+00:00,3.258971,3.258971,3.237911,3.237911,122406400


Step-2




In [18]:
def build_features(df):
    df = df.copy()
    close = df['Close'].squeeze()  # Convert to Series if it's a DataFrame
    volume = df['Volume'].squeeze()
    open_ = df['Open'].squeeze()
    high = df['High'].squeeze()
    low = df['Low'].squeeze()

    df['rsi'] = ta.momentum.RSIIndicator(close).rsi()
    macd = ta.trend.MACD(close)
    df['macd'] = macd.macd()
    df['macd_signal'] = macd.macd_signal()
    bb = ta.volatility.BollingerBands(close)
    df['bb_mid'] = bb.bollinger_mavg()
    df['bb_high'] = bb.bollinger_hband()
    df['bb_low'] = bb.bollinger_lband()
    df['bb_width'] = (df['bb_high'] - df['bb_low']) / df['bb_mid']
    df['percent_b'] = (close - df['bb_low']) / (df['bb_high'] - df['bb_low'])
    df['volume_change'] = volume.pct_change()

    body = abs(close - open_)
    candle_range = high - low
    upper_shadow = high - np.maximum(open_, close)
    lower_shadow = np.minimum(open_, close) - low

    df['shooting_star'] = ((body <= 0.3 * candle_range) & (upper_shadow >= 2 * body) & (lower_shadow <= 0.2 * body)).astype(int)
    df['hammer'] = ((body <= 0.3 * candle_range) & (lower_shadow >= 2 * body) & (upper_shadow <= 0.2 * body)).astype(int)

    return df.dropna()

## Step 3 — Label data for training

Label definition: a future horizon of 3 bars (~90 minutes). Label = 1 if future return > 0.2% (0.002), else 0. You can tune horizon & threshold.

In [19]:
horizon = 3
label_threshold = 0.002

labeled = {}

for sym, df in price_data.items():
    print(f'Preparing data for {sym}...')

    df_feat = build_features(df)
    df_feat['future_return'] = df_feat['Close'].shift(-horizon) / df_feat['Close'] - 1
    df_feat = df_feat.dropna()
    df_feat['label'] = (df_feat['future_return'] > label_threshold).astype(int)

    labeled[sym] = df_feat

    print(sym, 'label distribution:')
    print(df_feat['label'].value_counts(normalize=True).to_string())

Preparing data for BTC-USD...
BTC-USD label distribution:
label
0    0.763682
1    0.236318
Preparing data for GALA-USD...
GALA-USD label distribution:
label
0    0.595713
1    0.404287
Preparing data for XRP-USD...
XRP-USD label distribution:
label
0    0.636682
1    0.363318


## Step 4 — Prepare features and train RandomForest

We use a modest feature set. Train on BTC and save model.

In [24]:
# 🔁 Combine labeled data from all symbols
train_df = pd.concat([labeled[sym] for sym in ['BTC-USD', 'GALA-USD', 'XRP-USD']], axis=0)

# ✅ Flatten columns if using MultiIndex
train_df.columns = ['_'.join(filter(None, col)).strip() if isinstance(col, tuple) else col for col in train_df.columns]

# ✅ Define features and labels
non_feature_cols = ['label', 'future_return', 'ai_signal']  # Exclude non-features
feature_cols = [col for col in train_df.columns if col not in non_feature_cols]

X = train_df[feature_cols].replace([np.inf, -np.inf], np.nan).fillna(0)
X = np.clip(X, -100, 100)
y = train_df['label']

# ✅ Train model
model = RandomForestClassifier(n_estimators=200, max_depth=8, random_state=42)
model.fit(X, y)

print("✅ Model trained on multiple symbols")


✅ Model trained on multiple symbols


In [28]:
results = {}

for sym in labeled:
    print(f'\n📊 Processing {sym}...')
    df = labeled[sym].copy()

    # ✅ Flatten columns
    df.columns = ['_'.join(filter(None, col)).strip() if isinstance(col, tuple) else col for col in df.columns]

    # ✅ Ensure all training features are present
    missing_feature_cols = [col for col in feature_cols if col not in df.columns]
    for col in missing_feature_cols:
        df[col] = 0  # fill missing features with zero

    # ✅ Prepare features in the same column order as training
    X_sym = df[feature_cols].copy()
    X_sym = X_sym.replace([np.inf, -np.inf], np.nan).fillna(0)
    X_sym = np.clip(X_sym, -100, 100)

    # ✅ Align
    if len(X_sym) != len(df):
        df = df.iloc[-len(X_sym):].copy()

    # ✅ Predict
    try:
        df['ai_signal'] = model.predict(X_sym)
    except Exception as e:
        print(f"❌ Prediction failed for {sym}: {e}")
        continue

    # ✅ Validate required columns
    close_col = f'Close_{sym}'
    required_cols = [close_col, 'bb_low', 'bb_high', 'bb_width', 'ai_signal', 'rsi', 'macd', 'macd_signal']
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        print(f"❌ Skipping {sym}: missing columns {missing_cols}")
        continue

    # ✅ Clean NaNs
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=required_cols)

    # ✅ Volatility filter
    bb_width_ma = df['bb_width'].rolling(50).mean()
    vol_filter = df['bb_width'] < bb_width_ma

    # ✅ Entry condition: BB low + AI buy signal + volatility
    entries = (df[close_col] <= df['bb_low']) & (df['ai_signal'] == 1) & vol_filter
    entries_exec = entries.shift(1).fillna(False).infer_objects(copy=False).astype(bool)

    # ✅ Technical sell conditions
    macd_cross_down = (df['macd'].shift(1) > df['macd_signal'].shift(1)) & (df['macd'] < df['macd_signal'])
    rsi_overbought = df['rsi'] > 70

    # ✅ Smart exit: BB high or AI sell signal or technical sell
    ai_sell = df['ai_signal'] == -1
    exits_signal = (df[close_col] >= df['bb_high']) | ai_sell | macd_cross_down | rsi_overbought

    # ✅ Max holding time
    max_holding_bars = int(pd.Timedelta('3d') / pd.Timedelta('30min'))
    forced_exits = entries_exec.shift(max_holding_bars).fillna(False).infer_objects(copy=False).astype(bool)

    # ✅ Final exits
    exits_exec = exits_signal | forced_exits
    exits_exec = exits_exec.fillna(False)

    # ✅ Backtest
    pf = vbt.Portfolio.from_signals(
        close=df[close_col],
        entries=entries_exec,
        exits=exits_exec,
        init_cash=10000,
        fees=0.001,
        slippage=0.0005,
        freq='30min'
    )

    stats = pf.stats()
    print(f'✅ Stats for {sym}')
    display(stats)
    results[sym] = pf



📊 Processing BTC-USD...
✅ Stats for BTC-USD


  entries_exec = entries.shift(1).fillna(False).infer_objects(copy=False).astype(bool)
  forced_exits = entries_exec.shift(max_holding_bars).fillna(False).infer_objects(copy=False).astype(bool)


Unnamed: 0,0
Start,2025-07-28 16:30:00+00:00
End,2025-09-25 13:00:00+00:00
Period,58 days 15:00:00
Start Value,10000.0
End Value,10129.912279
Total Return [%],1.299123
Benchmark Return [%],-5.389001
Max Gross Exposure [%],100.0
Total Fees Paid,20.130062
Max Drawdown [%],0.149825



📊 Processing GALA-USD...
✅ Stats for GALA-USD


  entries_exec = entries.shift(1).fillna(False).infer_objects(copy=False).astype(bool)
  forced_exits = entries_exec.shift(max_holding_bars).fillna(False).infer_objects(copy=False).astype(bool)


Unnamed: 0,0
Start,2025-07-28 16:30:00+00:00
End,2025-09-25 13:00:00+00:00
Period,56 days 09:00:00
Start Value,10000.0
End Value,10035.497574
Total Return [%],0.354976
Benchmark Return [%],-14.899107
Max Gross Exposure [%],100.0
Total Fees Paid,118.93639
Max Drawdown [%],7.579147



📊 Processing XRP-USD...
✅ Stats for XRP-USD


  entries_exec = entries.shift(1).fillna(False).infer_objects(copy=False).astype(bool)
  forced_exits = entries_exec.shift(max_holding_bars).fillna(False).infer_objects(copy=False).astype(bool)


Unnamed: 0,0
Start,2025-07-28 16:30:00+00:00
End,2025-09-25 13:00:00+00:00
Period,53 days 12:00:00
Start Value,10000.0
End Value,10852.950095
Total Return [%],8.529501
Benchmark Return [%],-10.069862
Max Gross Exposure [%],100.0
Total Fees Paid,185.853979
Max Drawdown [%],2.934091


## Step 6 — Plot equity curves for all symbols

In [None]:
for sym, pf in results.items():
    print(f"\n🔍 Plotting {sym}...")

    raw_df = labeled[sym].copy()
    raw_df.columns = ['_'.join(filter(None, col)).strip() if isinstance(col, tuple) else col for col in raw_df.columns]

    # Use the index from portfolio wrapper or fallback to pf.close.index
    if hasattr(pf, 'wrapper') and hasattr(pf.wrapper, 'index'):
        index_to_use = pf.wrapper.index
    else:
        index_to_use = pf.close.index

    df = raw_df.reindex(index_to_use)

    close_col = f'Close_{sym}'
    if close_col not in df.columns:
        print(f"❌ Missing {close_col} in {sym}")
        continue

    # Extract trade entry and exit timestamps
    trades = pf.get_trades()
    entry_times = trades.records_readable['Entry Timestamp']
    exit_times = trades.records_readable['Exit Timestamp']

    fig = go.Figure()

    # Plot price line
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df[close_col],
        mode='lines',
        name='Price',
        line=dict(color='lightgray')
    ))

    # Plot entry markers
    fig.add_trace(go.Scatter(
        x=entry_times,
        y=df.loc[entry_times, close_col],
        mode='markers',
        name='Entry',
        marker=dict(symbol='triangle-up', color='green', size=10)
    ))

    # Plot exit markers
    fig.add_trace(go.Scatter(
        x=exit_times,
        y=df.loc[exit_times, close_col],
        mode='markers',
        name='Exit',
        marker=dict(symbol='triangle-down', color='red', size=10)
    ))

    # Detect AI signal column (handle multi-column or just 'ai_signal')
    ai_signal_cols = [col for col in df.columns if col.startswith('ai_signal')]
    if ai_signal_cols:
        ai_signal_col = ai_signal_cols[0]

        # AI Buy signals (ai_signal == 1)
        fig.add_trace(go.Scatter(
            x=df[df[ai_signal_col] == 1].index,
            y=df.loc[df[ai_signal_col] == 1, close_col],
            mode='markers',
            name='AI Buy Signal',
            marker=dict(color='lime', size=6, symbol='circle')
        ))

        # AI Sell signals (ai_signal == -1)
        fig.add_trace(go.Scatter(
            x=df[df[ai_signal_col] == -1].index,
            y=df.loc[df[ai_signal_col] == -1, close_col],
            mode='markers',
            name='AI Sell Signal',
            marker=dict(color='darkred', size=6, symbol='circle')
        ))

    fig.update_layout(
        title=f'{sym} - Price, Entries/Exits & AI Signals',
        xaxis_title='Date',
        yaxis_title='Price',
        height=700,
        template='plotly_dark'
    )

    fig.show()



🔍 Plotting BTC-USD...



🔍 Plotting GALA-USD...



🔍 Plotting XRP-USD...


## Next steps

- Tune label threshold, horizon, model hyperparameters.
- Consider training on combined multi-coin dataset for better generalization.
- Add transaction costs & more realistic slippage models.
- Run walk-forward validation and cross-validation for robustness.
