In [18]:
import os
import pandas as pd
import numpy as np

# ============================================================
# Backtesting Pair Trading Strategy (Version 2)
# ============================================================
# This version builds trading signals (based on rolling mean/std of spread)
# and runs a backtest comparing the strategy against benchmarks:
# - Buy & Hold BTC
# - Buy & Hold ETH
# - 50/50 Portfolio
#
# Signals are cached in CSV files under "rolling_mean_cache/" so that
# they do not need to be recomputed every time.
# ============================================================


def build_signals(btc, eth, k=1, window_days=10, cache_dir="rolling_mean_cache"):
    """
    Build trading signals based on the spread between BTC and ETH.

    Parameters
    ----------
    btc : DataFrame
        BTC OHLCV data (must include 'open_time' and 'close').
    eth : DataFrame
        ETH OHLCV data (must include 'open_time' and 'close').
    k : int, optional
        Standard deviation multiplier for signal thresholds.
    window_days : int, optional
        Rolling window size in days.
    cache_dir : str, optional
        Directory to save signal CSV files.

    Returns
    -------
    str
        Path to the saved signal file.
    """
    os.makedirs(cache_dir, exist_ok=True)
    output_path = os.path.join(cache_dir, f"signals_k{k}_w{window_days}.csv")

    df = pd.DataFrame({
        "time": pd.to_datetime(btc["open_time"]),
        "btc_close": btc["close"],
        "eth_close": eth["close"]
    })

    # Spread
    df["spread"] = df["btc_close"] - df["eth_close"]

    # Rolling mean & std
    window_size = 60 * 24 * window_days
    df["mean"] = df["spread"].rolling(window_size).mean()
    df["std"] = df["spread"].rolling(window_size).std()

    # Upper / lower bounds
    df["upper"] = df["mean"] + k * df["std"]
    df["lower"] = df["mean"] - k * df["std"]

    # Generate trading signals
    conditions = [
        df["spread"] > df["upper"],
        df["spread"] < df["lower"]
    ]
    choices = ["sell_btc_buy_eth", "buy_btc_sell_eth"]
    df["signal"] = np.select(conditions, choices, default="hold")

    df.to_csv(output_path, index=False)
    print(f"✅ Signals saved to {output_path}")
    return output_path


def run_backtest(btc, eth, k=1, window_days=10,
                 start_date=None, end_date=None,
                 initial_capital=1000, trade_size=200,
                 fee_rate=0.001, cache_dir="rolling_mean_cache"):
    """
    Run backtest for pair trading strategy and compare with benchmarks.

    Parameters
    ----------
    btc : DataFrame
        BTC OHLCV data.
    eth : DataFrame
        ETH OHLCV data.
    k : int
        Standard deviation multiplier for signal thresholds.
    window_days : int
        Rolling window size in days.
    start_date : str or None
        Backtest start date (inclusive).
    end_date : str or None
        Backtest end date (inclusive).
    initial_capital : float
        Starting portfolio value in USD.
    trade_size : float
        Trade size per signal in USD.
    fee_rate : float
        Transaction fee rate.
    cache_dir : str
        Directory where signals are stored.

    Returns
    -------
    dict
        Final portfolio values for benchmarks and strategy.
    Series
        Strategy equity curve.
    list
        Log of executed trades.
    DataFrame
        DataFrame with signals and spread data.
    """
    signal_file = os.path.join(cache_dir, f"signals_k{k}_w{window_days}.csv")

    # Build signals if not cached
    if not os.path.exists(signal_file):
        print(f"⚠️ Signal file {signal_file} not found → building...")
        build_signals(btc, eth, k, window_days, cache_dir)

    # Load signals
    df = pd.read_csv(signal_file, parse_dates=["time"])

    # Filter by date range
    if start_date:
        df = df[df["time"] >= pd.to_datetime(start_date)]
    if end_date:
        df = df[df["time"] <= pd.to_datetime(end_date)]
    df = df.reset_index(drop=True)

    # ------------------------------
    # Benchmarks
    # ------------------------------
    # 100% BTC
    bh_btc_units = initial_capital / df["btc_close"].iloc[0]
    bh_btc_value = bh_btc_units * df["btc_close"]

    # 100% ETH
    bh_eth_units = initial_capital / df["eth_close"].iloc[0]
    bh_eth_value = bh_eth_units * df["eth_close"]

    # 50/50 portfolio
    bh_half_btc = (initial_capital / 2) / df["btc_close"].iloc[0]
    bh_half_eth = (initial_capital / 2) / df["eth_close"].iloc[0]
    bh_equal_value = bh_half_btc * df["btc_close"] + bh_half_eth * df["eth_close"]

    # ------------------------------
    # Strategy
    # ------------------------------
    btc_units = (initial_capital / 2) / df["btc_close"].iloc[0]
    eth_units = (initial_capital / 2) / df["eth_close"].iloc[0]
    strat_values = []
    trade_log = []

    for _, row in df.iterrows():
        btc_price, eth_price, signal = row["btc_close"], row["eth_close"], row["signal"]
        total_value = btc_units * btc_price + eth_units * eth_price

        if signal == "sell_btc_buy_eth":
            btc_value = btc_units * btc_price
            if btc_value > 0:
                sell_value = min(trade_size, btc_value)
                fee = sell_value * fee_rate
                btc_units -= sell_value / btc_price
                eth_units += (sell_value - fee) / eth_price
                trade_log.append((row["time"], signal, total_value))

        elif signal == "buy_btc_sell_eth":
            eth_value = eth_units * eth_price
            if eth_value > 0:
                sell_value = min(trade_size, eth_value)
                fee = sell_value * fee_rate
                eth_units -= sell_value / eth_price
                btc_units += (sell_value - fee) / btc_price
                trade_log.append((row["time"], signal, total_value))

        strat_values.append(btc_units * btc_price + eth_units * eth_price)

    strat_values = pd.Series(strat_values, index=df["time"])

    # ------------------------------
    # Results summary
    # ------------------------------
    results = {
        "BTC Buy&Hold": bh_btc_value.iloc[-1],
        "ETH Buy&Hold": bh_eth_value.iloc[-1],
        "50/50 Portfolio": bh_equal_value.iloc[-1],
        "Strategy": strat_values.iloc[-1]
    }

    print("========== 📊 Backtest ==========")
    print(f"File: {signal_file}")
    print(f"Period: {start_date} → {end_date}")
    print(f"Initial Capital: ${initial_capital}")
    for name, val in results.items():
        ret = (val / initial_capital - 1) * 100
        print(f"{name:15s}: ${val:.2f} | Return: {ret:.2f}%")
    print(f"Total Trades: {len(trade_log)}")

    return results, strat_values, trade_log, df


In [21]:
# load data
# btc = pd.read_csv(r"C:\Users\amirs\OneDrive\Desktop\myAlgoCode\detector\BTCUSDT_1m_1000d.csv")
# eth = pd.read_csv(r"C:\Users\amirs\OneDrive\Desktop\myAlgoCode\detector\ETHUSDT_1m_1000d.csv")
results, strat_values, trade_log, df = run_backtest(
    btc, eth,
    k=2.5, window_days=7,
    start_date="2025-01-01", end_date="2025-09-01",
    initial_capital=1000, fee_rate=0.001
)


File: rolling_mean_cache\signals_k2.5_w7.csv
Period: 2025-01-01 → 2025-09-01
Initial Capital: $1000
BTC Buy&Hold   : $1156.21 | Return: 15.62%
ETH Buy&Hold   : $1311.57 | Return: 31.16%
50/50 Portfolio: $1233.89 | Return: 23.39%
Strategy       : $2210.08 | Return: 121.01%
Total Trades: 105


In [None]:
========== 📊 Backtest ==========
File: rolling_mean_cache\signals_k1.5_w7.csv
Period: 2025-01-01 → 2025-09-01
Initial Capital: $1000
BTC Buy&Hold   : $1156.21 | Return: 15.62%
ETH Buy&Hold   : $1311.57 | Return: 31.16%
50/50 Portfolio: $1233.89 | Return: 23.39%
Strategy       : $1927.83 | Return: 92.78%
Total Trades: 229