In [None]:
import pandas as pd
import numpy as np
import glob

In [None]:
ticker = 'BTCUSDT'
name = 'btc'

cols = [
    "timestamp",        # Open time
    "open",             # Open
    "high",             # High
    "low",              # Low
    "close",            # Close
    "volume_base",      # Volume
    "close_time",       # Close time
    "volume_quote",     # Quote asset volume
    "num_trades",       # Number of trades
    "taker_buy_base",   # Taker buy base asset volume
    "taker_buy_quote",  # Taker buy quote asset volume
    "ignore"            # Ignore
]

files = sorted(glob.glob(f"{ticker}-1s-2025-*.csv"))
print(f"{len(files)} fichiers trouvés : {[f.split('/')[-1] for f in files]}")

df = pd.concat(
    [pd.read_csv(f, header=None, names=cols) for f in files],
    ignore_index=True
)

# Types
df["timestamp"] = pd.to_datetime(df["timestamp"].astype("int64"), unit="ms", utc=True)
for c in ["open","high","low","close","volume_base","volume_quote",
          "taker_buy_base","taker_buy_quote"]:
    df[c] = df[c].astype(float)
df["num_trades"] = df["num_trades"].astype(int)

# Buy/Sell
eps = 1e-12
df["buy_qty"]       = df["taker_buy_base"]
df["sell_qty"]      = df["volume_base"] - df["buy_qty"]
df["buy_notional"]  = df["taker_buy_quote"]
df["sell_notional"] = df["volume_quote"] - df["buy_notional"]

# OFI
df["ofi_qty"]      = (df["buy_qty"] - df["sell_qty"]) / (df["volume_base"] + eps)
df["ofi_notional"] = (df["buy_notional"] - df["sell_notional"]) / (df["volume_quote"] + eps)

# Forward return 1sec
df["r_1s_fwd"] = np.log(df["close"].shift(-1) / df["close"])
df = df.dropna(subset=["r_1s_fwd"])

df["month"] = df["timestamp"].dt.month

print(f"Total bougies  : {len(df):,}")
print(f"Période        : {df.timestamp.min()} → {df.timestamp.max()}")
print(f"Prix médian    : ${df.close.median():,.2f}")
print(f"Spread bps moy : {((df.high-df.low)/df.close).mean()*10000:.2f} bps")

# Sauvegarder pour ne pas recharger à chaque fois
df.to_parquet(f"{name}_1s_Jul-Dec_2025.parquet", index=False)
print("Sauvegardé → {name}_1s_Jul-Dec_2025.parquet")