In [None]:
import pandas as pd
from tqdm import tqdm
import glob
import pickle
from pathlib import Path
from src.simulate_quotes import QuoteModel,FeatureBuilder

base_path = "ticks"
sessions = glob.glob(f"{base_path}/*-BID_ASK.parquet")
len(sessions)

In [None]:
def load_sessions(paths):
    """Charge plusieurs Parquet, ajoute la colonne `symbol`, concatène et trie."""
    dfs = []
    for p in paths:
        df = pd.read_parquet(p)
        symbol = Path(p).stem.split("-")[0]     # WHLR-2025-06-12-... → WHLR
        df["symbol"] = symbol
        dfs.append(df)
    return pd.concat(dfs).sort_values("time", kind="mergesort").reset_index(drop=True)

def train_model(df):
    qm = QuoteModel()
    df_symbol = df.groupby("symbol")
    pbar = tqdm(df_symbol, total=len(df_symbol))
    for symbol, sub in pbar:    # ① on traite chaque titre isolément
        pbar.set_description(f"Training QuoteModel for {symbol}")
        fb = FeatureBuilder()                   #    => prev_mid propre au symbole
        buffer = []
        current_sec = sub.time.iloc[0].floor("s")
        for row in sub.itertuples(index=False):
            sec = row.time.floor("s")
            if sec != current_sec:
                qm.update_second(buffer)        # ② on transmet SEULEMENT les features
                buffer, current_sec = [], sec
            buffer.append(fb.transform(row))
        if buffer:
            qm.update_second(buffer)
    return qm

In [None]:
# 1) ingestion
df = load_sessions(sessions)
df

In [None]:
# 2) entraînement
print("Training model...")
model = train_model(df)

In [None]:
# save model
ts = pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
with open(f"../models/model-{ts}.pkl", "wb") as f:
    pickle.dump(model, f)