In [2]:
# Import the class from the Python file (module)
import pandas as pd
import matplotlib.pyplot as plt
import os
# from dotenv import load_dotenv
# from pathlib import Path
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from BinanceClient import BinanceClient
import numpy as np
from typing import Final
import joblib
from BatchFeatures import BatchFeatures
from datetime import datetime, timedelta
%matplotlib widget

## Load pair df

In [1]:
import os
from datetime import datetime, timedelta, timezone

def interval_slug(s: str) -> str:
    return s.strip().replace(" ", "").replace("/", "").lower()

def make_db_name(pair: str, interval: str, weeks: int) -> str:
    return f"{pair}_{interval_slug(interval)}_{weeks}weeks.db"

def load_or_fetch_pair_df(pair: str, interval: str, weeks: int) -> tuple[str, "pd.DataFrame"]:
    db_name = make_db_name(pair, interval, weeks)
    db_path = db_name

    print(f"[{pair}] DB: {db_path}")

    binance_client = BinanceClient(db_path)
    binance_client.set_interval(interval)

    df = None

    if os.path.exists(db_path):
        df = binance_client.fetch_data_from_db(pair)
        if df is not None and not df.empty:
            print(f"[{pair}] Loaded {len(df):,} rows from DB.")
        else:
            df = None

    if df is None:
        print(f"[{pair}] No usable DB data found -> fetching from Binance...")

        api_secret = os.getenv("BINANCE_SECRET_KEY")
        api_key = os.getenv("BINANCE_API_KEY")
        binance_client.make(api_key, api_secret)

        server_time = binance_client.get_server_time()
        end_dt = datetime.fromtimestamp(server_time["serverTime"] / 1000, tz=timezone.utc)
        start_dt = end_dt - timedelta(weeks=weeks)

        start_ms = int(start_dt.timestamp() * 1000)
        end_ms = int(end_dt.timestamp() * 1000)

        data = binance_client.fetch_data(pair, start_ms, end_ms)
        if data is None or data.empty:
            raise RuntimeError(f"[{pair}] No data returned from Binance for the requested range.")

        binance_client.store_data_to_db(pair, data)

        df = binance_client.fetch_data_from_db(pair)
        if df is None or df.empty:
            raise RuntimeError(f"[{pair}] Data fetched/stored but DB load returned empty.")

        print(f"[{pair}] Fetched + stored + loaded {len(df):,} rows.")

    df = df.sort_index()
    return db_path, df


## Load BTC + ETH, then align timestamps

In [3]:
import pandas as pd

interval = "5m"
weeks = 52
pairs = ["BTCUSDT", "ETHUSDT"]

paths = {}
dfs = {}

for p in pairs:
    db_path, df = load_or_fetch_pair_df(p, interval, weeks)
    paths[p] = db_path
    dfs[p] = df

btc = dfs["BTCUSDT"].copy()
eth = dfs["ETHUSDT"].copy()

# Ensure index is datetime and UTC-consistent (should already be)
btc.index = pd.to_datetime(btc.index, utc=False)
eth.index = pd.to_datetime(eth.index, utc=False)

# Inner-join on timestamp intersection (avoid forward-fill unless you really want it)
joined = (
    btc.add_prefix("btc_")
       .join(eth.add_prefix("eth_"), how="inner")
       .sort_index()
)

print("Joined rows:", len(joined))
print("Start:", joined.index.min(), "End:", joined.index.max())
print(joined.head())


[BTCUSDT] DB: BTCUSDT_5m_52weeks.db
[BTCUSDT] Loaded 104,832 rows from DB.
[ETHUSDT] DB: ETHUSDT_5m_52weeks.db
[ETHUSDT] Loaded 104,832 rows from DB.
Joined rows: 104818
Start: 2025-01-25 16:30:00 End: 2026-01-24 15:15:00
                      btc_open   btc_high    btc_low  btc_close  btc_volume  \
timestamp                                                                     
2025-01-25 16:30:00  104786.49  104791.33  104679.35  104719.99    36.23060   
2025-01-25 16:35:00  104719.99  104815.99  104719.99  104749.46    58.02236   
2025-01-25 16:40:00  104749.46  104765.49  104599.39  104638.00    58.29234   
2025-01-25 16:45:00  104637.99  104741.76  104591.67  104671.21    48.11209   
2025-01-25 16:50:00  104671.21  104717.90  104659.52  104688.50    27.81774   

                     eth_open  eth_high  eth_low  eth_close  eth_volume  
timestamp                                                                
2025-01-25 16:30:00   3343.44   3344.95  3335.69    3339.98    906.9382  
20

## Build relative-value “spread” features

In [4]:
import numpy as np

def add_rv_features(df: pd.DataFrame, beta_window=12*24*7, z_window=12*24*7):
    # 1 week defaults on 5m: 12 bars/hr * 24 * 7 = 2016
    out = df.copy()

    out["log_btc"] = np.log(out["btc_close"])
    out["log_eth"] = np.log(out["eth_close"])

    # Rolling beta: regress log_eth on log_btc (simple rolling OLS via cov/var)
    x = out["log_btc"]
    y = out["log_eth"]
    cov = y.rolling(beta_window).cov(x)
    var = x.rolling(beta_window).var()
    out["beta"] = cov / var

    out["spread"] = out["log_eth"] - out["beta"] * out["log_btc"]

    mu = out["spread"].rolling(z_window).mean()
    sd = out["spread"].rolling(z_window).std(ddof=0)
    out["z"] = (out["spread"] - mu) / sd

    # optional helpers
    out["z_change"] = out["z"].diff()
    out["corr"] = out["log_eth"].rolling(z_window).corr(out["log_btc"])

    return out

joined2 = add_rv_features(joined)
joined2 = joined2.dropna()  # drop warmup region
print(joined2[["beta","spread","z","corr"]].tail())


                         beta     spread         z      corr
timestamp                                                   
2026-01-24 14:55:00  1.874402 -13.378103 -1.968378  0.986454
2026-01-24 15:00:00  1.874884 -13.383005 -1.967502  0.986460
2026-01-24 15:05:00  1.875392 -13.387315 -1.966387  0.986470
2026-01-24 15:10:00  1.875840 -13.392032 -1.965439  0.986475
2026-01-24 15:15:00  1.876294 -13.397841 -1.964936  0.986479


## Backtester

In [5]:
import pandas as pd
import numpy as np

def rv_switching_backtest(
    df: pd.DataFrame,
    z_col: str = "z",
    price_btc: str = "btc_close",
    price_eth: str = "eth_close",
    z_enter: float = 2.0,
    z_exit: float = 0.5,
    min_hold_bars: int = 12,          # 12 bars = 60 min on 5m
    fee_rate: float = 0.00075,        # 0.075% per side (BNB discount)
    slippage_bps: float = 0.0,        # set e.g. 1.0 for 1 bp
    initial_equity: float = 10_000.0,
):
    """
    Long-only switching RV:
      States: USDT, LONG_BTC, LONG_ETH
      Trades are executed close-to-close (your existing assumption style).
    """
    assert z_enter > z_exit >= 0

    slip = slippage_bps * 1e-4  # bps -> fraction
    eq = initial_equity

    state = "USDT"
    entry_eq = None
    entry_ts = None
    entry_price = None
    entry_asset = None
    bars_in_pos = 0

    records = []

    def exec_buy(price):
        # pay fee + slippage on buy
        return price * (1 + fee_rate + slip)

    def exec_sell(price):
        # pay fee + slippage on sell
        return price * (1 - fee_rate - slip)

    def close_position(ts, price, reason):
        nonlocal eq, state, entry_eq, entry_ts, entry_price, entry_asset, bars_in_pos
        if state == "USDT":
            return

        # equity is fully in the asset; model PnL via price ratio and sell cost
        sell_px = exec_sell(price)
        buy_px = exec_buy(entry_price)

        gross_mult = price / entry_price
        cost_mult = sell_px / price * entry_price / buy_px
        # Equivalent: net_mult = (exec_sell(price) / exec_buy(entry_price))
        net_mult = exec_sell(price) / exec_buy(entry_price)

        eq_after = entry_eq * net_mult
        ret = (eq_after / entry_eq) - 1.0

        records.append({
            "entry_ts": entry_ts,
            "exit_ts": ts,
            "asset": entry_asset,
            "entry_price": entry_price,
            "exit_price": price,
            "bars": bars_in_pos,
            "reason": reason,
            "equity_before": entry_eq,
            "equity_after": eq_after,
            "return": ret,
        })

        # reset
        eq = eq_after
        state = "USDT"
        entry_eq = entry_ts = entry_price = entry_asset = None
        bars_in_pos = 0

    def open_position(ts, asset, price):
        nonlocal eq, state, entry_eq, entry_ts, entry_price, entry_asset, bars_in_pos
        state = f"LONG_{asset}"
        entry_eq = eq
        entry_ts = ts
        entry_price = price
        entry_asset = asset
        bars_in_pos = 0

    # Iterate bars
    for ts, row in df.iterrows():
        z = row[z_col]
        btc_px = row[price_btc]
        eth_px = row[price_eth]

        # Update hold counter
        if state != "USDT":
            bars_in_pos += 1

        # Exit condition first (to allow flattening)
        if state != "USDT":
            if abs(z) <= z_exit and bars_in_pos >= 1:
                # exit current asset
                px = btc_px if state == "LONG_BTC" else eth_px
                close_position(ts, px, reason="Z_EXIT")
                continue

        # Entry / switching logic (respect min_hold to reduce churn)
        # If in position and min_hold not satisfied, do nothing.
        if state != "USDT" and bars_in_pos < min_hold_bars:
            continue

        # Desired target based on z
        target = None
        if z <= -z_enter:
            target = "ETH"
        elif z >= +z_enter:
            target = "BTC"
        else:
            target = None

        # If target is None, optionally flatten (only if not already flat)
        if target is None:
            # no action; exit already handled by z_exit band
            continue

        # If flat -> open
        if state == "USDT":
            px = eth_px if target == "ETH" else btc_px
            open_position(ts, target, px)
            continue

        # If holding the other asset -> switch (sell then buy)
        held = "BTC" if state == "LONG_BTC" else "ETH"
        if held != target:
            # close held
            px_sell = btc_px if held == "BTC" else eth_px
            close_position(ts, px_sell, reason=f"SWITCH_{held}_TO_{target}")
            # open new
            px_buy = eth_px if target == "ETH" else btc_px
            open_position(ts, target, px_buy)

    # Close any open position at the end (optional)
    if state != "USDT":
        last_ts = df.index[-1]
        last_row = df.iloc[-1]
        px = last_row[price_btc] if state == "LONG_BTC" else last_row[price_eth]
        close_position(last_ts, px, reason="EOD")

    trades = pd.DataFrame(records)
    summary = {
        "trades": len(trades),
        "total_return": (eq / initial_equity) - 1.0,
        "final_equity": eq,
        "avg_trade_return": trades["return"].mean() if len(trades) else 0.0,
        "win_rate": (trades["return"] > 0).mean() if len(trades) else 0.0,
        "median_trade_return": trades["return"].median() if len(trades) else 0.0,
    }
    return trades, summary


## Run Backtester

In [6]:
# joined2 is your aligned df with rv features and dropna() already done
trades, summary = rv_switching_backtest(
    joined2,
    z_enter=2.0,
    z_exit=0.5,
    min_hold_bars=12,
    fee_rate=0.00075,
    slippage_bps=0.0,
)

print(summary)
print(trades["reason"].value_counts())
print(trades.tail())


{'trades': 39, 'total_return': np.float64(0.09576975299645629), 'final_equity': np.float64(10957.697529964562), 'avg_trade_return': np.float64(0.004519788550897488), 'win_rate': np.float64(0.46153846153846156), 'median_trade_return': np.float64(-0.0013151639603486975)}
reason
Z_EXIT    38
EOD        1
Name: count, dtype: int64
              entry_ts             exit_ts asset  entry_price  exit_price  \
34 2025-12-22 08:40:00 2025-12-23 05:10:00   BTC     89746.40    87900.01   
35 2025-12-26 00:00:00 2026-01-02 05:25:00   BTC     87251.20    88583.52   
36 2026-01-02 18:30:00 2026-01-05 10:20:00   ETH      3114.89     3175.96   
37 2026-01-12 18:00:00 2026-01-13 21:20:00   ETH      3099.72     3215.63   
38 2026-01-20 21:30:00 2026-01-24 15:15:00   ETH      2990.60     2965.39   

    bars  reason  equity_before  equity_after    return  
34   246  Z_EXIT   10585.800360  10352.473922 -0.022041  
35  2081  Z_EXIT   10352.473922  10494.801490  0.013748  
36   766  Z_EXIT   10494.801490  1

## Grid Run

In [7]:
z_enter_list = [1.5, 2.0, 2.5, 3.0]
z_exit_list  = [0.25, 0.5, 0.75, 1.0]
min_hold_list = [6, 12, 24, 48]


In [None]:
results = []

for z_enter in z_enter_list:
    for z_exit in z_exit_list:
        if z_exit >= z_enter:
            continue
        for min_hold in min_hold_list:
            trades, summary = rv_switching_backtest(
                joined2,
                z_enter=z_enter,
                z_exit=z_exit,
                min_hold_bars=min_hold,
                fee_rate=0.00075,
                slippage_bps=0.0,
            )

            results.append({
                "z_enter": z_enter,
                "z_exit": z_exit,
                "min_hold": min_hold,
                "trades": summary["trades"],
                "total_return": summary["total_return"],
                "avg_trade_return": summary["avg_trade_return"],
                "win_rate": summary["win_rate"],
            })

grid = pd.DataFrame(results).sort_values(
    ["total_return", "avg_trade_return"],
    ascending=False
)

grid.head(10)
