# Crypto AI Backtest (Multi-Coin)

This notebook trains an AI model on BTC then applies it to BTC, GALA and XRP. It backtests a Bollinger+AI-filtered strategy on each coin and shows performance metrics and equity curves.

**Run in Google Colab or Jupyter.** Uncomment the pip installs if needed.

In [5]:
# Install packages if running in a fresh environment
!pip install yfinance pandas numpy ta scikit-learn joblib vectorbt matplotlib


Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting vectorbt
  Downloading vectorbt-0.28.1-py3-none-any.whl.metadata (12 kB)
Collecting dateparser (from vectorbt)
  Downloading dateparser-1.2.2-py3-none-any.whl.metadata (29 kB)
Collecting schedule (from vectorbt)
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Collecting mypy_extensions (from vectorbt)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets>=7.0.0->vectorbt)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading vectorbt-0.28.1-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.7/527.7 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dateparser-1.2.2-py3-none-any.whl (315 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m

In [7]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
import vectorbt as vbt
import matplotlib.pyplot as plt

pd.options.display.max_columns = 100


## Step 1 — Download 30m OHLCV data for BTC, GALA, XRP (180 days)

In [8]:
symbols = ['BTC-USD', 'GALA-USD', 'XRP-USD']
interval = '30m'
period = '60d'

price_data = {}
for sym in symbols:
    print(f'Downloading {sym}...')
    df = yf.download(sym, period=period, interval=interval, progress=False)
    if df.empty:
        print(f'Warning: no data for {sym}')
    price_data[sym] = df.dropna()

# show samples
for s, df in price_data.items():
    print(s, 'rows:', len(df))
    display(df.head())


Downloading BTC-USD...


  df = yf.download(sym, period=period, interval=interval, progress=False)


Downloading GALA-USD...


  df = yf.download(sym, period=period, interval=interval, progress=False)


Downloading XRP-USD...


  df = yf.download(sym, period=period, interval=interval, progress=False)


BTC-USD rows: 2847


Price,Close,High,Low,Open,Volume
Ticker,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-25 00:00:00+00:00,118057.445312,118486.976562,118057.445312,118378.054688,250773504
2025-07-25 00:30:00+00:00,117779.953125,118062.414062,117739.976562,118062.414062,2017566720
2025-07-25 01:00:00+00:00,117441.09375,117824.523438,117433.460938,117754.445312,3003072512
2025-07-25 01:30:00+00:00,117503.882812,117708.710938,117295.992188,117480.242188,1409163264
2025-07-25 02:00:00+00:00,117197.4375,117477.929688,117192.976562,117477.929688,2977775616


GALA-USD rows: 2847


Price,Close,High,Low,Open,Volume
Ticker,GALA-USD,GALA-USD,GALA-USD,GALA-USD,GALA-USD
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-25 00:00:00+00:00,0.017164,0.017227,0.017107,0.017227,868048
2025-07-25 00:30:00+00:00,0.017079,0.017133,0.017,0.017103,599072
2025-07-25 01:00:00+00:00,0.017034,0.017106,0.017034,0.017061,3464032
2025-07-25 01:30:00+00:00,0.017262,0.017262,0.017076,0.017076,3528224
2025-07-25 02:00:00+00:00,0.017267,0.017285,0.017207,0.017266,946976


XRP-USD rows: 2847


Price,Close,High,Low,Open,Volume
Ticker,XRP-USD,XRP-USD,XRP-USD,XRP-USD,XRP-USD
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-07-25 00:00:00+00:00,3.131793,3.150334,3.119851,3.14431,0
2025-07-25 00:30:00+00:00,3.110566,3.131998,3.098372,3.131998,19999744
2025-07-25 01:00:00+00:00,3.116445,3.125106,3.109566,3.110578,7050240
2025-07-25 01:30:00+00:00,3.14933,3.14933,3.117744,3.117744,33296384
2025-07-25 02:00:00+00:00,3.133383,3.150634,3.130561,3.148849,7322624


In [24]:
def build_features(df):
    df = df.copy()
    close = df['Close'].squeeze()  # Convert to Series if it's a DataFrame
    volume = df['Volume'].squeeze()
    open_ = df['Open'].squeeze()
    high = df['High'].squeeze()
    low = df['Low'].squeeze()

    df['rsi'] = ta.momentum.RSIIndicator(close).rsi()
    macd = ta.trend.MACD(close)
    df['macd'] = macd.macd()
    df['macd_signal'] = macd.macd_signal()
    bb = ta.volatility.BollingerBands(close)
    df['bb_mid'] = bb.bollinger_mavg()
    df['bb_high'] = bb.bollinger_hband()
    df['bb_low'] = bb.bollinger_lband()
    df['bb_width'] = (df['bb_high'] - df['bb_low']) / df['bb_mid']
    df['percent_b'] = (close - df['bb_low']) / (df['bb_high'] - df['bb_low'])
    df['volume_change'] = volume.pct_change()

    body = abs(close - open_)
    candle_range = high - low
    upper_shadow = high - np.maximum(open_, close)
    lower_shadow = np.minimum(open_, close) - low

    df['shooting_star'] = ((body <= 0.3 * candle_range) & (upper_shadow >= 2 * body) & (lower_shadow <= 0.2 * body)).astype(int)
    df['hammer'] = ((body <= 0.3 * candle_range) & (lower_shadow >= 2 * body) & (upper_shadow <= 0.2 * body)).astype(int)

    return df.dropna()

## Step 3 — Label data for training

Label definition: a future horizon of 3 bars (~90 minutes). Label = 1 if future return > 0.2% (0.002), else 0. You can tune horizon & threshold.

In [27]:
horizon = 3
label_threshold = 0.002
labeled = {}

for sym, df in features.items():
    df2 = df.copy()
    df2['future_return'] = df2['Close'].shift(-horizon) / df2['Close'] - 1
    df2 = df2.dropna()
    df2['label'] = (df2['future_return'] > label_threshold).astype(int)
    labeled[sym] = df2
    print(sym, 'label distribution:')
    print(df2['label'].value_counts(normalize=True).to_string())

## Step 4 — Prepare features and train RandomForest

We use a modest feature set. Train on BTC and save model.

In [26]:
feature_cols = ['rsi','macd','bb_mid','bb_high','bb_low','bb_width','percent_b','volume_change','shooting_star','hammer']
X = train_df[feature_cols].fillna(0)
y = train_df['label']

# Train-test split (time-series aware: no shuffle)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = RandomForestClassifier(n_estimators=200, max_depth=8, random_state=42)
model.fit(X_train, y_train)

print('Training complete')
from sklearn.metrics import classification_report
print(classification_report(y_test, model.predict(X_test)))

# Save model
joblib.dump(model, 'crypto_ai_model.pkl')
print('Model saved to crypto_ai_model.pkl')


NameError: name 'train_df' is not defined

## Step 5 — Apply model to all symbols and backtest

We will generate AI signals (1=approve trade) and then backtest Bollinger entries filtered by AI approval (entry only when price <= lower band and ai==1). Exits are price >= upper band.

In [None]:
results = {}
for sym in symbols:
    print('\nProcessing', sym)
    df = labeled[sym].copy()
    # Align features
    X_sym = df[feature_cols].fillna(0)
    df['ai_signal'] = model.predict(X_sym)
    # Bollinger rule
    entries = (df['Close'] <= df['bb_low']) & (df['ai_signal'] == 1)
    exits = (df['Close'] >= df['bb_high'])
    # Shift execution to next bar
    entries_exec = entries.shift(1).fillna(False)
    exits_exec = exits.shift(1).fillna(False)

    pf = vbt.Portfolio.from_signals(df['Close'], entries_exec, exits_exec,
                                    init_cash=10000, fees=0.001, slippage=0.0005, freq='30T')
    stats = pf.stats()
    print(f'--- Stats for {sym} ---')
    display(stats)
    results[sym] = pf


## Step 6 — Plot equity curves for all symbols

In [None]:
plt.figure(figsize=(12,6))
for sym, pf in results.items():
    (pf.value() / pf.value().iloc[0]).vbt.plot(label=sym)
plt.title('Normalized Equity Curves (Bollinger + AI filter)')
plt.legend()
plt.show()

# Also show individual detailed plots if desired
for sym, pf in results.items():
    print('\nDetailed plot for', sym)
    display(pf.stats())
    pf.plot().show()


## Next steps

- Tune label threshold, horizon, model hyperparameters.
- Consider training on combined multi-coin dataset for better generalization.
- Add transaction costs & more realistic slippage models.
- Run walk-forward validation and cross-validation for robustness.
