In [None]:
import pandas as pd
import numpy as np

# --- Paths -----------------------------------------------------------------
DATA_FILE = "/Users/ananyaparikh/Documents/Coding/DeepRGARCH/code submission/data/rv.pkl"          # <-- change if your file lives elsewhere
TICKER    = ".AEX"                  # one of the panel tickers

# --- Load data -------------------------------------------------------------
print("loading", DATA_FILE)
df_panel = pd.read_pickle(DATA_FILE)
df_panel.index = df_panel.index.set_levels(
    pd.to_datetime(df_panel.index.levels[-1], utc=True).tz_localize(None),
    level=-1)

print(df_panel)

# grab one ticker and rename realised‑meas columns --------------------------
cols_rv = ["rv5", "bv", "medrv", "rk_parzen", "rsv"]
df      = df_panel.loc[TICKER, cols_rv + ["close_price"]].copy()

In [None]:
from mulrv import RealRECHD_2LSTM, RealRECH_2LSTM 
from pathlib import Path

def get_dataset(filepath: str, ticker: str = "SPX", rv_cols=None, *,
                split=0.5):
    """Load *rv.pkl* from Oxford‑Man realised library and return
    (Y_train, Y_test, RV_train, RV_test) where
        • Y   = 100·demeaned log‑returns (T, 1)
        • RV  = selected realised vols, scaled to returns variance  (T, K)
    Only the requested *ticker* slice is used.  The boolean *split*
    controls the train/test percentage.
    """
    rv_cols = rv_cols or ['rv5', 'bv', 'medrv', 'rk_parzen', 'rsv']

    df = pd.read_pickle(filepath)
    df.index = df.index.set_levels(
        pd.to_datetime(df.index.levels[-1], utc=True).tz_localize(None),
        level=-1,
    )
    s = df.loc[ticker].copy()

    # returns -------------------------------------------------------
    s['return'] = np.log(s['close_price']).diff()
    s.dropna(subset=['return'], inplace=True)
    s['return'] = 100 * (s['return'] - s['return'].mean())

    # realised vols -------------------------------------------------
    K = len(rv_cols)
    c_scale = (s['return']**2).mean() / s[rv_cols].mean()
    s[rv_cols] *= c_scale

    # split ---------------------------------------------------------
    n_train = int(len(s) * split)
    train, test = s.iloc[:n_train], s.iloc[n_train:]

    def _to_np(dd, cols):
        arr = dd[cols].to_numpy()
        return arr.reshape(-1, 1) if arr.ndim == 1 else arr

    return (
        _to_np(train, 'return'),
        _to_np(test,  'return'),
        _to_np(train, rv_cols),
        _to_np(test,  rv_cols),
    )
