In [1]:
import json
import pandas as pd

RUN_DIR = "outputs/backtest/binance_flow_top10_2018_2025"
FOLD = 52

with open(f"{RUN_DIR}/meta.json", "r") as f:
    meta = json.load(f)

data_path = meta["data_path"]
init_train = int(meta["init_train"])
oos_window = int(meta["oos_window"])
step = int(meta["step"])

df = pd.read_csv(data_path)

# timestamp 字段兼容：binance downloader 一般是 timestamp (ms) 或 ISO 字符串
if "timestamp" in df.columns:
    # 尝试：若是 int(ms)
    try:
        ts = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
    except Exception:
        ts = pd.to_datetime(df["timestamp"], utc=True)
else:
    raise ValueError("data中未找到timestamp列")

df["dt"] = ts.dt.tz_convert(None)

# 这里无法100%复刻你脚本内部的 dropna 起点（取决于 flow 特征具体实现）
# 但足够用于定位 fold 的大致区间：按时间顺序、只保留有效行（close非空）
df = df.sort_values("dt").reset_index(drop=True)
df = df[df["close"].notna()].reset_index(drop=True)

start = init_train + (FOLD * step)
train_start = 0
train_end = start - 1
test_start = start
test_end = start + oos_window - 1

if test_end >= len(df):
    raise ValueError(f"fold={FOLD} 超出数据范围：test_end={test_end}, n={len(df)}")

print("RUN_DIR:", RUN_DIR)
print("data_path:", data_path)
print("n_rows:", len(df))
print("init_train,oos_window,step:", init_train, oos_window, step)
print("---")
print(f"fold={FOLD}")
print("TRAIN:", df.loc[train_start, "dt"], "->", df.loc[train_end, "dt"], f"(rows {train_start}..{train_end})")
print("OOS  :", df.loc[test_start, "dt"], "->", df.loc[test_end, "dt"], f"(rows {test_start}..{test_end})")

RUN_DIR: outputs/backtest/binance_flow_top10_2018_2025
data_path: data/raw/btc_binance_BTCUSDT_1d.csv
n_rows: 2903
init_train,oos_window,step: 1500 63 21
---
fold=52
TRAIN: 2018-01-01 00:00:00 -> 2025-02-04 00:00:00 (rows 0..2591)
OOS  : 2025-02-05 00:00:00 -> 2025-04-08 00:00:00 (rows 2592..2654)
