# Walk-Forward with backtest_framework (1d data)

This notebook shows how to run the class-based walk-forward on daily data.

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

from backtest_framework import (
    DataBundle, MomentumIDParams, MomentumIDStrategy, LongShortVolWeighting, WalkForwardRunner,
    BacktestEngine, compute_sharpe, compute_sortino_ratio, compute_calmar_ratio, compute_composite_score, select_score
)
from binance_data_loader import BinanceDataLoader



## Load data
Adjust paths/filters as needed for your 1d parquet files.

In [2]:

# Configure loader for daily data
data_loader = BinanceDataLoader(
    data_directory="/Users/chinjieheng/Documents/data/binance_dailydata",
    timeframe="1d",
    funding_rate_directory="/Users/chinjieheng/Documents/data/binance_fundingrate_data",
    min_records=60,
    min_volume=1e5,
    start_date="2022-09-01",
    end_date=None,
)

price = data_loader.get_price_matrix()
# Rolling volume for universe selection (20d avg)
volume_data = {t: data_loader._crypto_universe[t]['data']['volume'].reindex(price.index) for t in data_loader.get_universe()}
volume_df = pd.DataFrame(volume_data, index=price.index)
rolling_volume_df = volume_df.rolling(window=20, min_periods=10).mean()

# BTC 90d return filter
btc_90d_return = price['BTCUSDT'].pct_change(90, fill_method=None) if 'BTCUSDT' in price.columns else None


Loading Binance data from /Users/chinjieheng/Documents/data/binance_dailydata (timeframe=1d)...
Found 611 USDT trading pairs
Using a 30-bar rolling window for 30d volume checks
✓ BTCUSDT loaded successfully with 1232 records, avg volume: 14,822,734,848
Loaded 590 cryptocurrencies
Filtered 18 cryptocurrencies (insufficient data/volume)
Precomputing returns matrix (FAST numpy version)...
Building returns matrix (Memory Optimized)...
Matrix shape: (1232, 590)
Precomputed returns matrix shape: (1232, 590)
Date range: 2022-09-01 00:00:00 to 2026-01-14 00:00:00
Loading funding rate data from /Users/chinjieheng/Documents/data/binance_fundingrate_data...
Found 613 funding rate files
Loaded funding rates for 590 symbols


## Build DataBundle
Precompute for the windows used in the param grid.

In [3]:
funding_long = data_loader.get_funding_long_form()   # MultiIndex (symbol, timestamp) with rate + interval
funding_long = funding_long[funding_long['fundingIntervalHours'] != 1]  # drop any 1h intervals per symbol/timestamp
funding_df = (
    funding_long['fundingRate']
    .unstack(level=0)
    .sort_index()
)   # index: funding event timestamps (full timestamps), columns: symbols

daily_funding = funding_df.resample('D').sum(min_count=1)  # Resample to daily frequency by summing up funding rates, min_count=1 to avoid trading NaNs when no funding events
daily_funding



symbol,0GUSDT,1000000BOBUSDT,1000000MOGUSDT,1000BONKUSDT,1000CATUSDT,1000CHEEMSUSDT,1000FLOKIUSDT,1000LUNCUSDT,1000PEPEUSDT,1000RATSUSDT,...,ZETAUSDT,ZILUSDT,ZKCUSDT,ZKJUSDT,ZKUSDT,ZORAUSDT,ZRCUSDT,ZROUSDT,ZRXUSDT,币安人生USDT
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-09-01,,,,,,,,,,,...,,-0.000145,,,,,,,-0.000549,
2022-09-02,,,,,,,,,,,...,,0.000093,,,,,,,-0.000184,
2022-09-03,,,,,,,,,,,...,,0.000204,,,,,,,-0.000728,
2022-09-04,,,,,,,,,,,...,,0.000265,,,,,,,-0.000143,
2022-09-05,,,,,,,,,,,...,,0.000104,,,,,,,0.000271,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2026-01-10,-0.005002,0.003313,0.00030,0.000248,0.000103,0.000300,0.000045,-0.000888,0.000132,0.003281,...,-0.000082,0.000300,-0.000174,0.00030,-0.001188,-0.000062,0.000382,0.000217,-0.001243,0.000662
2026-01-11,-0.004838,0.002789,0.00030,0.000232,0.000132,0.000300,-0.000004,-0.000711,0.000170,0.003110,...,-0.003211,0.000300,0.000063,0.00030,-0.000861,-0.000188,0.000300,0.000300,-0.003524,0.000534
2026-01-12,-0.003070,0.000892,0.00030,0.000023,0.000128,0.000234,0.000079,-0.000510,0.000183,0.003245,...,-0.000260,-0.006586,-0.001315,0.00030,-0.000441,-0.000955,0.000310,0.000161,-0.004760,0.000300
2026-01-13,-0.001164,0.002184,0.00030,0.000177,0.000195,-0.000392,0.000172,-0.000334,0.000265,0.002313,...,-0.001007,-0.002766,-0.000191,0.00030,-0.000094,-0.000292,0.000300,0.000120,-0.001423,0.000244


In [4]:
# Parameter grids (daily bars)
simple_windows = [7,12,14]
vol_windows = [15,20,25,30,35,40,45]

bundle = DataBundle(
    price_df=price,
    rolling_volume_df=rolling_volume_df,
    btc_ret=btc_90d_return,
    funding_df=daily_funding,
    min_hist_days=30
)
# Precompute shared matrices
bundle.ensure_simple_returns(simple_windows)
bundle.ensure_id_matrix(simple_windows)
bundle.ensure_vol_matrix(vol_windows)

## Define parameter grid and strategy factory

Tweak `grid_choices` to explore different weighting methods (`vol`, `equal`, `alpha`, `alpha_over_vol`) and momentum modes (`absolute`, `relative`), plus allocation parameters.

In [5]:
from itertools import product

# Choices for cartesian grid
grid_choices = {
    "volume_pct": [0.2],
    "momentum_pct": [0.1],
    "momentum_mode": ["relative"],
    "weighting_method": ["vol"],
    "long_id_threshold": [-0.8],  # de-emphasize ID filter
    "short_id_threshold": [-0.8],
    "max_positions_per_side": [10],
    "max_position_cap": [0.3],
    "tc_bps": [5],  # cost in bps
    "use_btc_filter": [True],
}

choices = list(product(
    grid_choices["volume_pct"],
    grid_choices["momentum_pct"],
    grid_choices["momentum_mode"],
    grid_choices["weighting_method"],
    grid_choices["long_id_threshold"],
    grid_choices["short_id_threshold"],
    grid_choices["max_positions_per_side"],
    grid_choices["max_position_cap"],
    grid_choices["tc_bps"],
    grid_choices["use_btc_filter"],
))

param_grid = []
for sw in simple_windows:
    for vw in vol_windows:
        for vc, mc, mm, wm, lt, st, mpps, mcap, tc, btc in choices:
            param_grid.append(
                MomentumIDParams(
                    simple_window=sw,
                    id_window=sw,
                    vol_window=vw,
                    volume_pct=vc,
                    momentum_pct=mc,
                    momentum_mode=mm,
                    weighting_method=wm,
                    long_id_threshold=lt,
                    short_id_threshold=st,
                    max_positions_per_side=mpps,
                    max_position_cap=mcap,
                    min_weight=0.05,
                    tc_bps=tc,
                    use_btc_filter=btc,
                )
            )

print(f"Param grid size: {len(param_grid)}")

# Strategy factory closure
def make_strategy(params):
    return MomentumIDStrategy(params)

weighting_model = LongShortVolWeighting()


Param grid size: 21


## Configure walk-forward
All spans in bars (days for 1d data).

In [6]:

train_span = 365  # e.g., ~9 months
test_span = 90    # e.g., ~3 months
step_span = 90    # step size

runner = WalkForwardRunner(
    periods_per_year=365,
    bundle=bundle,
    strategy_factory=make_strategy,
    weighting_model=weighting_model,
    params_grid=param_grid,
    train_span=train_span,
    test_span=test_span,
    step_span=step_span,
    mode="expanding",
    score_mode="composite",
    n_jobs=10,  # set >1 to thread across params
)



## Run walk-forward

In [7]:
wf_df, oos_returns, oos_equity, positions_df = runner.run()

print(wf_df[[
    'iteration', 'train_start', 'train_end', 'test_start', 'test_end',
    'is_score', 'oos_score', 'is_sharpe', 'oos_sharpe'
]])
print("Combined OOS Sharpe:", compute_sharpe(oos_returns, periods_per_year=runner.periods_per_year) if len(oos_returns) > 1 else float('nan'))

report = runner.report(
    wf_df=wf_df,
    oos_returns=oos_returns,
    oos_equity=oos_equity,
    plot=True,
    fig_dir="figures",
)

combined_equity = report.get('combined_equity', oos_equity)
iter_stats_df = report.get('iter_stats')
combined_returns_recomputed = report.get('combined_returns_recomputed')
combined_turnover = report.get('combined_turnover')


   iteration train_start  train_end test_start   test_end  is_score  \
0          1  2022-09-01 2023-09-01 2023-09-01 2023-11-29  2.135914   
1          2  2022-09-01 2023-11-30 2023-11-30 2024-02-27  1.354913   
2          3  2022-09-01 2024-02-28 2024-02-28 2024-05-27  2.055312   
3          4  2022-09-01 2024-05-28 2024-05-28 2024-08-25  2.294115   
4          5  2022-09-01 2024-08-26 2024-08-26 2024-11-23  1.934638   
5          6  2022-09-01 2024-11-24 2024-11-24 2025-02-21  1.933135   
6          7  2022-09-01 2025-02-22 2025-02-22 2025-05-22  2.278761   
7          8  2022-09-01 2025-05-23 2025-05-23 2025-08-20  2.302267   
8          9  2022-09-01 2025-08-21 2025-08-21 2025-11-18  2.619451   
9         10  2022-09-01 2025-11-19 2025-11-19 2026-01-14  2.629958   

   oos_score  is_sharpe  oos_sharpe  
0  -1.042701   1.244889   -0.764088  
1   5.185330   0.893799    2.465314  
2   6.281641   1.309817    2.481369  
3  -1.346042   1.446638   -1.066029  
4   2.791617   1.271344    1

  btc_equity = btc_prices.fillna(method="ffill") / initial_btc


## Save results (optional)

In [8]:
wf_df.to_csv('walkforward_results_class.csv', index=False)
oos_returns.to_csv('walkforward_oos_returns.csv', header=False)
oos_equity.to_csv('walkforward_oos_equity.csv', header=False)
if 'iter_stats_df' in locals() and iter_stats_df is not None and not iter_stats_df.empty:
    iter_stats_df.to_csv('walkforward_iter_stats.csv', index=False)
if 'combined_turnover' in locals() and combined_turnover is not None and not combined_turnover.empty:
    combined_turnover.to_csv('walkforward_oos_turnover.csv', header=False)
if 'positions_df' in locals() and positions_df is not None and not positions_df.empty:
    positions_df.to_csv('walkforward_positions.csv', index=False)