In [1]:
import pandas as pd
import numpy as np
from binance_data_loader import BinanceDataLoader
from bab_framework import (
    FundingDataBundle, 
    compute_sharpe,
    BettingAgainstBetaParams,
    BettingAgainstBetaStrategy,
    BettingAgainstBetaWeighting,
    BABWalkForwardRunner
)
from itertools import product

In [2]:
loader = BinanceDataLoader(
    data_directory="/Users/chinjieheng/Documents/data/binance_1Hdata",
    funding_rate_directory="/Users/chinjieheng/Documents/data/binance_fundingrate_data",
    timeframe='1h',
    min_records=90,
    min_volume=1e6,
    start_date="2023-01-01",
    end_date=None
)

price_hf = loader.get_price_matrix()   
#price_hf = price_hf.resample('1h').last()  # Resample to 1-hour intervals                                                                                                                                                         
returns_df_hf = price_hf.pct_change()                                                                                                                                               

# Prepare Daily Data
price_daily = price_hf.resample('D').last()      
                                                                                                                                                        
# Process Funding Data (Aggregate to Daily)                                                                                                                                                     
funding_long = loader.get_funding_long_form()                                                                                                                                                   
                                                    
daily_funding = (                                                                                                                                                                               
    funding_long['fundingRate']                                                                                                                                                                 
    .unstack(level=0)                                                                                                                                                                           
    .sort_index()                                                                                                                                                                               
    .resample('D').sum(min_count=1)                                                                                                                                                             
)   

# Get Volume Data (30d Rolling Mean for stability)
daily_volume = loader.get_volume_matrix(vol_30d=True)
# Ensure alignment with price_daily (since loader might return different index due to resampling)
daily_volume = daily_volume.reindex(price_daily.index).fillna(0.0)

Loading Binance data from /Users/chinjieheng/Documents/data/binance_1Hdata (timeframe=1h)...
Found 611 USDT trading pairs
Using a 720-bar rolling window for 30d volume checks
âœ“ BTCUSDT loaded successfully with 26622 records, avg volume: 638,118,144
Loaded 570 cryptocurrencies
Filtered 38 cryptocurrencies (insufficient data/volume)
Precomputing returns matrix (FAST numpy version)...
Building returns matrix (Memory Optimized)...
Matrix shape: (26622, 570)
Precomputed returns matrix shape: (26622, 570)
Date range: 2023-01-01 00:00:00 to 2026-01-14 05:00:00
Loading funding rate data from /Users/chinjieheng/Documents/data/binance_fundingrate_data...
Found 613 funding rate files
Loaded funding rates for 570 symbols


  returns_df_hf = price_hf.pct_change()


Building volume matrix (volume_30d) for 570 tickers over 26622 dates...


In [3]:
# Create daily returns for the main backtest heartbeat
returns_daily = price_daily.pct_change()

bundle = FundingDataBundle(                                                                                                                                                                    
    price_df=price_daily,                                                                                                                                                                         
    funding_df=daily_funding,                                                                                                                                                                  
    returns_df=returns_daily,  # Daily returns for backtest heartbeat
    volume_df=daily_volume,    # Volume for filtering
    returns_df_hf=returns_df_hf,  # HF returns for beta calculation
    hf_window_multiplier=24, # 24 hours per day for 1h HF data
    min_hist_days=30,
    hf_resample_rule='D'                                                                                                                                                                           
)

  returns_daily = price_daily.pct_change()


In [16]:
grid_choices = {
    "beta_window": [30],
    "portfolio_size_each_side": [10], # Now applies to frazzini_pedersen as well (Top K selection)
    "target_side_beta": [1.0],
    "beta_tolerance": [0.001],
    "gross_exposure_limit": [1.0], # Used for solver method ignore if frazzini_pedersen weigthing
    "leverage_cap": [5.0], # NEW: Max Leverage for Frazzini method (allows hitting Beta=1)
    "beta_type": ["btc"],
    "tc_bps": [5],
    "use_shrinkage": [True],
    "prior_beta_window": [90],
    "min_weight": [0.0], 
    "max_weight": [1.0], 
    "weighting_method": ["frazzini_pedersen"], 
    "max_funding_short": [0.1], 
    "min_funding_long": [-0.1],
    "volatility_scaling": [True],
    "volume_filter_threshold": [0.8] # Keep top 80% of assets by volume (exclude bottom 20%)
}

param_combinations = list(product(
    grid_choices["beta_window"],
    grid_choices["portfolio_size_each_side"],
    grid_choices["target_side_beta"],
    grid_choices["beta_tolerance"],
    grid_choices["gross_exposure_limit"],
    grid_choices["leverage_cap"],
    grid_choices["beta_type"],
    grid_choices["tc_bps"],
    grid_choices["use_shrinkage"],
    grid_choices["prior_beta_window"],
    grid_choices["min_weight"],
    grid_choices["max_weight"],
    grid_choices["weighting_method"],
    grid_choices["max_funding_short"],
    grid_choices["min_funding_long"],
    grid_choices["volatility_scaling"],
    grid_choices["volume_filter_threshold"]
))

params_grid = []
for (beta_w, p_size, t_beta, tol, gross_lim, lev_cap, b_type, tc, shrink, prior_w, min_w, max_w, w_method, max_f_short, min_f_long, vol_scale, vol_thresh) in param_combinations:
    params_grid.append(
        BettingAgainstBetaParams(
            beta_window=beta_w,
            portfolio_size_each_side=p_size,
            target_side_beta=t_beta,
            beta_tolerance=tol,
            gross_exposure_limit=gross_lim,
            leverage_cap=lev_cap,
            beta_type=b_type,
            tc_bps=tc,
            use_shrinkage=shrink,
            prior_beta_window=prior_w,
            min_weight=min_w,
            max_weight=max_w,
            weighting_method=w_method,
            max_funding_short=max_f_short,
            min_funding_long=min_f_long,
            volatility_scaling=vol_scale,
            volume_filter_threshold=vol_thresh
        )
    )

In [17]:
runner = BABWalkForwardRunner(
    bundle=bundle,
    params_grid=params_grid,
    train_span=90,
    test_span=90,
    step_span=90,
    score_mode="sharpe",
    mode='expanding'
)

# Run
wf_df, oos_returns, oos_equity, oos_price_ret, oos_funding_ret, positions_df, detailed_df = runner.run()

# Print DataFrame slice
cols_to_show = ['iteration', 'train_start', 'train_end', 'test_start', 'test_end', 'is_score', 'oos_score', 'is_sharpe', 'oos_sharpe']
if not wf_df.empty:
    print(wf_df[cols_to_show])

print("Combined OOS Sharpe:", compute_sharpe(oos_returns, periods_per_year=365) if len(oos_returns) > 1 else float('nan'))

# Generate Report
report = runner.report(
    wf_df=wf_df,
    oos_returns=oos_returns,
    oos_equity=oos_equity,
    oos_price_returns=oos_price_ret,
    oos_funding_returns=oos_funding_ret,
    plot=True,
    fig_dir="figures_bab"
)

# Save detailed records
if not detailed_df.empty:
    detailed_df.to_csv('detailed_records_bab_wf.csv', index=False)
    print(f"Saved {len(detailed_df)} detailed records to detailed_records_bab_wf.csv")

BAB Iteration 1 (Expanding): Train [0:90], Test [90:179]
BAB Iteration 2 (Expanding): Train [0:180], Test [180:269]
BAB Iteration 3 (Expanding): Train [0:270], Test [270:359]
BAB Iteration 4 (Expanding): Train [0:360], Test [360:449]
BAB Iteration 5 (Expanding): Train [0:450], Test [450:539]
BAB Iteration 6 (Expanding): Train [0:540], Test [540:629]
BAB Iteration 7 (Expanding): Train [0:630], Test [630:719]
BAB Iteration 8 (Expanding): Train [0:720], Test [720:809]
BAB Iteration 9 (Expanding): Train [0:810], Test [810:899]
BAB Iteration 10 (Expanding): Train [0:900], Test [900:989]
BAB Iteration 11 (Expanding): Train [0:990], Test [990:1079]
BAB Iteration 12 (Expanding): Train [0:1080], Test [1080:1109]
    iteration train_start  train_end test_start   test_end  is_score  \
0           1  2023-01-01 2023-04-01 2023-04-01 2023-06-29      -inf   
1           2  2023-01-01 2023-06-30 2023-06-30 2023-09-27  2.295425   
2           3  2023-01-01 2023-09-28 2023-09-28 2023-12-26  1.940985   

In [18]:
# Diagnostic: Verify BAB mechanics
print("=== BAB DIAGNOSTICS ===\n")

if not positions_df.empty:
    print("1. Beta Neutrality Check:")
    print(f"   Average Net Beta: {positions_df['net_beta'].mean():.4f}")
    print(f"   Max Net Beta: {positions_df['net_beta'].max():.4f}")
    print(f"   Min Net Beta: {positions_df['net_beta'].min():.4f}")
    
    print("\n2. Long/Short Beta Profile:")
    print(f"   Average Long Beta Exposure (should be close to +1): {positions_df['long_beta'].mean():.4f}")
    print(f"   Average Short Beta Exposure (should be close to -1): {positions_df['short_beta'].mean():.4f}")
    
    print("\n3. Sample Daily Records:")
    cols = ['date', 'long_positions', 'long_beta', 'short_positions', 'short_beta', 'net_beta', 'daily_return']
    print(positions_df[cols].head())

# 2. Check for flat days (missing data handling)
flat_days = (oos_returns == 0).sum()
print(f"\n4. Flat return days (missing data): {flat_days}")

# 3. Display return decomposition
print(f"\n5. Return decomposition:")
print(f"   Total Price Return: {oos_price_ret.sum()*100:.2f}%")
print(f"   Total Funding Return: {oos_funding_ret.sum()*100:.2f}%")
print(f"   Total Return: {(oos_equity.iloc[-1] - 1)*100:.2f}%")

=== BAB DIAGNOSTICS ===

1. Beta Neutrality Check:
   Average Net Beta: -0.0000
   Max Net Beta: 0.0000
   Min Net Beta: -0.0000

2. Long/Short Beta Profile:
   Average Long Beta Exposure (should be close to +1): 0.9956
   Average Short Beta Exposure (should be close to -1): -0.9956

3. Sample Daily Records:
        date  long_positions  long_beta  short_positions  short_beta  \
0 2023-04-02              10        1.0               10        -1.0   
1 2023-04-03              10        1.0               10        -1.0   
2 2023-04-04              10        1.0               10        -1.0   
3 2023-04-05              10        1.0               10        -1.0   
4 2023-04-06              10        1.0               10        -1.0   

       net_beta  daily_return  
0  0.000000e+00      0.004078  
1  1.110223e-16     -0.006301  
2 -1.110223e-16     -0.007679  
3  1.110223e-16      0.022678  
4  1.110223e-16     -0.009313  

4. Flat return days (missing data): 0

5. Return decomposition:
