## Chronos_Fx-Price Finetuned Multi M


In [None]:
pip install chronos-forecasting

In [None]:
import pandas as pd
from chronos import BaseChronosPipeline

pipeline = BaseChronosPipeline.from_pretrained("amazon/chronos-2", device_map="cuda")


## Finetune Best Param

In [None]:

from __future__ import annotations
import io, time, math, itertools
from dataclasses import dataclass
from typing import Optional, Tuple, Dict, Callable, List

import numpy as np
import pandas as pd
import requests, certifi
from sklearn.metrics import mean_absolute_error

import torch
from chronos import BaseChronosPipeline  # chronos-forecasting>=2.0



FINETUNE_FX_COLS = [
    "AUD", "CAD", "CHF", "DKK", "GBP",
    "ISK", "JPY", "NZD", "SEK", "USD", "XDR",
]

NB_PANEL_URL = (
    "https://raw.githubusercontent.com/bredeespelid/"
    "Data_MasterOppgave/refs/heads/main/FineTuneData/NB1980-1999.csv"
)

@dataclass
class FTConfig:
    prediction_length: int = 32
    num_steps: int = 50
    learning_rate: float = 1e-5
    batch_size: int = 2
    logging_steps: int = 10
    max_context: int = 2048
    min_hist_days: int = 40
    m_freq: str = "M"
    verbose: bool = True

FTCFG = FTConfig()


def download_csv_text(url: str, retries: int = 3, timeout: int = 60) -> str:
    last_err = None
    for k in range(1, retries + 1):
        try:
            r = requests.get(url, timeout=timeout, verify=certifi.where())
            r.raise_for_status()
            return r.text
        except Exception as e:
            last_err = e
            if k < retries:
                time.sleep(1.5 * k)
    raise RuntimeError(f"Download failed: {last_err}")


def load_finetune_fx_panel(url: str) -> pd.DataFrame:
    text = download_csv_text(url)
    raw = pd.read_csv(
        io.StringIO(text),
        sep=";",
        decimal=".",
        encoding="utf-8-sig",
    )
    required = ["ds"] + FINETUNE_FX_COLS
    missing = set(required) - set(raw.columns)
    if missing:
        raise ValueError(f"Missing columns in fine-tune CSV: {missing}")

    df = (
        raw[required]
        .rename(columns={"ds": "DATE"})
        .assign(DATE=lambda x: pd.to_datetime(x["DATE"], dayfirst=True, errors="coerce"))
        .dropna(subset=["DATE"])
        .sort_values("DATE")
        .set_index("DATE")
    )
    for c in FINETUNE_FX_COLS:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    df = df.dropna(how="all", subset=FINETUNE_FX_COLS)
    return df


def build_base_chronos_pipeline() -> BaseChronosPipeline:
    if not torch.cuda.is_available():
        raise SystemExit("CUDA not available.")
    return BaseChronosPipeline.from_pretrained(
        "amazon/chronos-2",
        device_map="cuda",
        torch_dtype=torch.float16,
    )


def finetune_chronos_on_nb_panel(
    pipeline: BaseChronosPipeline,
    fx_panel: pd.DataFrame,
    ft_params: FTConfig,
) -> BaseChronosPipeline:
    train_inputs: List[Dict] = []
    for col in FINETUNE_FX_COLS:
        series = fx_panel[col].dropna().astype(np.float32).values
        if series.size < ft_params.prediction_length * 2:
            continue
        train_inputs.append(
            {"target": series, "past_covariates": {}, "future_covariates": {}}
        )

    if not train_inputs:
        raise RuntimeError("No valid series for fine-tuning.")

    pipeline = pipeline.fit(
        inputs=train_inputs,
        prediction_length=ft_params.prediction_length,
        num_steps=ft_params.num_steps,
        learning_rate=ft_params.learning_rate,
        batch_size=ft_params.batch_size,
        logging_steps=ft_params.logging_steps,
    )
    return pipeline


def series_daily_and_b(df_d: pd.DataFrame, col: str) -> Tuple[pd.Series, pd.Series]:
    S_d = df_d[col].astype(float)
    S_b = S_d.asfreq("B").ffill()
    return S_b, S_d


def last_trading_day(S_b: pd.Series, start: pd.Timestamp, end: pd.Timestamp) -> Optional[pd.Timestamp]:
    sl = S_b.loc[start:end]
    return sl.index[-1] if not sl.empty else None


def build_model_chronos2_multi_priceonly(
    pipeline: BaseChronosPipeline,
    max_context: int,
) -> Callable[[np.ndarray, int], np.ndarray]:

    def extract_median(pred: pd.DataFrame) -> np.ndarray:
        df = pred.copy()
        if "timestamp" in df.columns:
            df = df.sort_values("timestamp")
        if "0.5" in df.columns:
            arr = df["0.5"].to_numpy()
        elif "predictions" in df.columns:
            arr = df["predictions"].to_numpy()
        elif "forecast" in df.columns and "quantile" in df.columns:
            df = df.loc[df["quantile"] == 0.5].copy()
            arr = df["forecast"].to_numpy()
        else:
            for cand in ("forecast", "p50", "median", "mean"):
                if cand in df.columns:
                    arr = df[cand].to_numpy()
                    break
            else:
                raise RuntimeError("Unsupported Chronos schema.")
        return np.asarray(arr, dtype=float)

    def forecast_fn(x: np.ndarray, H: int) -> np.ndarray:
        ctx = np.asarray(x, dtype=float).ravel()[-max_context:]
        ts = pd.date_range("2000-01-01", periods=len(ctx), freq="D")
        df = pd.DataFrame({"item_id": "series_1", "timestamp": ts, "target": ctx})
        with torch.inference_mode():
            pred = pipeline.predict_df(
                df,
                prediction_length=H,
                quantile_levels=[0.5],
                id_column="item_id",
                timestamp_column="timestamp",
                target="target",
            )
        med = extract_median(pred)
        return med[:H]

    return forecast_fn


def walk_forward_monthly_priceonly(
    S_b: pd.Series,
    S_d: pd.Series,
    forecast_fn: Callable[[np.ndarray, int], np.ndarray],
    m_freq: str = "M",
    min_hist_days: int = 40,
    max_context: int = 2048,
    max_horizon: int = 64,
    start_period: Optional[pd.Period] = None,
) -> pd.DataFrame:
    first_m = pd.Period(S_b.index.min(), freq=m_freq)
    last_m  = pd.Period(S_b.index.max(),  freq=m_freq)
    if start_period is not None:
        first_m = max(first_m, start_period)
    months = pd.period_range(first_m, last_m, freq=m_freq)

    rows, dropped = {}, {}

    for m in months:
        prev_m = m - 1
        m_start, m_end = m.start_time, m.end_time
        prev_start, prev_end = prev_m.start_time, prev_m.end_time

        cut = last_trading_day(S_b, prev_start, prev_end)
        if cut is None:
            dropped[str(m)] = "no_cut"
            continue

        hist_d = S_d.loc[:cut]
        if hist_d.size < min_hist_days:
            dropped[str(m)] = "short_hist"
            continue

        idx_m_b = S_b.index[(S_b.index >= m_start) & (S_b.index <= m_end)]
        if idx_m_b.size < 1:
            dropped[str(m)] = "no_bdays"
            continue
        y_true = float(S_b.loc[idx_m_b].mean())

        H = (m_end.date() - m_start.date()).days + 1
        if H <= 0 or H > max_horizon:
            dropped[str(m)] = "bad_H"
            continue

        context = min(max_context, len(hist_d))
        x = hist_d.values[-context:]
        pf = forecast_fn(x, H)

        f_idx = pd.date_range(cut + pd.Timedelta(days=1), periods=H, freq="D")
        pred_daily = pd.Series(pf[:H], index=f_idx)

        pred_b = pred_daily.reindex(idx_m_b, method=None)
        if pred_b.isna().all():
            dropped[str(m)] = "no_overlap"
            continue
        y_pred = float(pred_b.dropna().mean())

        rows[str(m)] = {"month": m, "y_true": y_true, "y_pred": y_pred}

    df = pd.DataFrame.from_dict(rows, orient="index")
    if not df.empty:
        df = df.set_index("month").sort_index()
    return df


def rmse_mae(df_eval: pd.DataFrame) -> Tuple[float, float]:
    core = df_eval.dropna()
    if core.empty:
        return np.nan, np.nan
    err = core["y_true"] - core["y_pred"]
    rmse = float(np.sqrt(np.mean(err**2)))
    mae  = float(mean_absolute_error(core["y_true"], core["y_pred"]))
    return rmse, mae


# -----------------------------
# 1) Load NB panel and split train/val
# -----------------------------
fx_panel_full = load_finetune_fx_panel(NB_PANEL_URL)

val_start = pd.Timestamp("1996-01-01")
train_panel = fx_panel_full.loc[:val_start - pd.Timedelta(days=1)]
val_panel   = fx_panel_full.loc[val_start:]

if FTCFG.verbose:
    print(f"NB full:  {fx_panel_full.index.min().date()} → {fx_panel_full.index.max().date()} | n={len(fx_panel_full)}")
    print(f"Train:    {train_panel.index.min().date()} → {train_panel.index.max().date()} | n={len(train_panel)}")
    print(f"Validate: {val_panel.index.min().date()} → {val_panel.index.max().date()} | n={len(val_panel)}")

val_start_period = pd.Period(val_panel.index.min(), freq=FTCFG.m_freq)


grid = {
    "learning_rate": [5e-6, 1e-5, 2e-5],
    "num_steps": [20, 50, 100],
    "batch_size": [2, 4],
    "prediction_length": [32],
}

param_list = list(itertools.product(
    grid["learning_rate"],
    grid["num_steps"],
    grid["batch_size"],
    grid["prediction_length"],
))

results = []


for lr, steps, bs, pl in param_list:
    ft_params = FTConfig(
        prediction_length=pl,
        num_steps=steps,
        learning_rate=lr,
        batch_size=bs,
        logging_steps=FTCFG.logging_steps,
        max_context=FTCFG.max_context,
        min_hist_days=FTCFG.min_hist_days,
        m_freq=FTCFG.m_freq,
        verbose=False,
    )

    base_pipe = build_base_chronos_pipeline()
    ft_pipe = finetune_chronos_on_nb_panel(base_pipe, train_panel, ft_params)
    forecast_fn = build_model_chronos2_multi_priceonly(ft_pipe, max_context=ft_params.max_context)

    series_rmses = []
    series_maes  = []

    val_daily = val_panel.asfreq("D").ffill()

    for col in FINETUNE_FX_COLS:
        if col not in val_daily.columns:
            continue
        S_b, S_d = series_daily_and_b(val_daily, col)

        df_eval = walk_forward_monthly_priceonly(
            S_b=S_b,
            S_d=S_d,
            forecast_fn=forecast_fn,
            m_freq=ft_params.m_freq,
            min_hist_days=ft_params.min_hist_days,
            max_context=ft_params.max_context,
            max_horizon=64,
            start_period=val_start_period,
        )
        r, m_ = rmse_mae(df_eval)
        if np.isfinite(r):
            series_rmses.append(r)
            series_maes.append(m_)

    avg_rmse = float(np.mean(series_rmses)) if series_rmses else np.nan
    avg_mae  = float(np.mean(series_maes)) if series_maes else np.nan

    results.append({
        "learning_rate": lr,
        "num_steps": steps,
        "batch_size": bs,
        "prediction_length": pl,
        "val_avg_rmse": avg_rmse,
        "val_avg_mae": avg_mae,
    })

    print(f"lr={lr:.0e}, steps={steps}, bs={bs} -> val_RMSE={avg_rmse:.4f}, val_MAE={avg_mae:.4f}")


res_df = pd.DataFrame(results).sort_values("val_avg_rmse")
best = res_df.iloc[0].to_dict()

print("\nBest params (val):")
print(best)

BEST_FT_PARAMS = FTConfig(
    prediction_length=int(best["prediction_length"]),
    num_steps=int(best["num_steps"]),
    learning_rate=float(best["learning_rate"]),
    batch_size=int(best["batch_size"]),
    logging_steps=FTCFG.logging_steps,
    max_context=FTCFG.max_context,
    min_hist_days=FTCFG.min_hist_days,
    m_freq=FTCFG.m_freq,
    verbose=True,
)


`torch_dtype` is deprecated! Use `dtype` instead!


NB full:  1980-12-10 → 1999-12-31 | n=4930
Train:    1980-12-10 → 1995-12-29 | n=3919
Validate: 1996-01-02 → 1999-12-31 | n=1011


`torch_dtype` is deprecated! Use `dtype` instead!
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.6523
20,1.3152


lr=5e-06, steps=20, bs=2 -> val_RMSE=1.0737, val_MAE=0.7723


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.2279
20,2.3716


lr=5e-06, steps=20, bs=4 -> val_RMSE=1.0551, val_MAE=0.7604


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.6519
20,1.3316
30,1.8109
40,2.9446
50,1.1522


lr=5e-06, steps=50, bs=2 -> val_RMSE=1.0844, val_MAE=0.7558


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.2285
20,2.418
30,1.8151
40,2.57
50,2.6494


lr=5e-06, steps=50, bs=4 -> val_RMSE=1.0798, val_MAE=0.7669


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.6518
20,1.3384
30,1.8359
40,3.0053
50,1.1664
60,2.1271
70,2.7172
80,1.8866
90,2.8219
100,1.5627


lr=5e-06, steps=100, bs=2 -> val_RMSE=1.0646, val_MAE=0.7583


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.2289
20,2.4328
30,1.8292
40,2.5843
50,2.6136
60,1.6414
70,2.3086
80,1.8444
90,2.5691
100,1.9691


lr=5e-06, steps=100, bs=4 -> val_RMSE=1.0732, val_MAE=0.7753


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.7287
20,1.4457


lr=1e-05, steps=20, bs=2 -> val_RMSE=1.0776, val_MAE=0.7890


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.2338
20,2.6184


lr=1e-05, steps=20, bs=4 -> val_RMSE=1.0787, val_MAE=0.7787


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.7293
20,1.4565
30,1.9225
40,3.139
50,1.2471


lr=1e-05, steps=50, bs=2 -> val_RMSE=1.1040, val_MAE=0.7730


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.2402
20,2.6707
30,1.8615
40,2.6144
50,2.6595


lr=1e-05, steps=50, bs=4 -> val_RMSE=1.0837, val_MAE=0.7702


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.7299
20,1.471
30,1.9753
40,3.2208
50,1.2188
60,2.2695
70,2.6416
80,1.9041
90,2.8832
100,1.711


lr=1e-05, steps=100, bs=2 -> val_RMSE=1.0675, val_MAE=0.7749


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.2434
20,2.6877
30,1.88
40,2.6602
50,2.6382
60,1.6497
70,2.3516
80,1.8774
90,2.6363
100,2.0472


lr=1e-05, steps=100, bs=4 -> val_RMSE=1.0692, val_MAE=0.7984


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.8921
20,1.9615


lr=2e-05, steps=20, bs=2 -> val_RMSE=1.1139, val_MAE=0.8323


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.287
20,2.9418


lr=2e-05, steps=20, bs=4 -> val_RMSE=1.1275, val_MAE=0.8241


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.8989
20,1.88
30,2.6117
40,3.1725
50,1.2112


lr=2e-05, steps=50, bs=2 -> val_RMSE=1.0908, val_MAE=0.7881


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.3162
20,3.0235
30,1.9049
40,2.7654
50,2.6284


lr=2e-05, steps=50, bs=4 -> val_RMSE=1.0766, val_MAE=0.7833


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.9
20,1.8851
30,2.4584
40,3.2652
50,1.2551
60,2.5909
70,2.6963
80,2.1551
90,2.646
100,1.7476


lr=2e-05, steps=100, bs=2 -> val_RMSE=1.0899, val_MAE=0.7959


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,2.3267
20,3.0693
30,1.8944
40,2.8011
50,2.5874
60,1.7713
70,2.6568
80,1.9756
90,2.9283
100,2.2497


lr=2e-05, steps=100, bs=4 -> val_RMSE=1.1065, val_MAE=0.8578

Best params (val):
{'learning_rate': 5e-06, 'num_steps': 20.0, 'batch_size': 4.0, 'prediction_length': 32.0, 'val_avg_rmse': 1.0551303039975972, 'val_avg_mae': 0.7603691171620345}


## Multi FX - M

In [None]:
from __future__ import annotations
import io, time, math
from dataclasses import dataclass
from typing import Optional, Tuple, Dict, Callable, List

import numpy as np
import pandas as pd
import requests, certifi
from sklearn.metrics import mean_absolute_error

import torch
from chronos import BaseChronosPipeline


MULTI_URL = (
    "https://raw.githubusercontent.com/bredeespelid/"
    "Data_MasterOppgave/refs/heads/main/EURNOK/MultiFXData.csv"
)
NB_PANEL_URL = (
    "https://raw.githubusercontent.com/bredeespelid/"
    "Data_MasterOppgave/refs/heads/main/FineTuneData/NB1980-1999.csv"
)
COV_URL = (
    "https://raw.githubusercontent.com/bredeespelid/"
    "Data_MasterOppgave/refs/heads/main/Variables/All_Variables/variables_daily.csv"
)

COV_COLS = ["VIX", "SP500", "Brent"]

FINETUNE_FX_COLS = [
    "AUD", "CAD", "CHF", "DKK", "GBP",
    "ISK", "JPY", "NZD", "SEK", "USD", "XDR",
]


# -----------------------------
# Helpers
# -----------------------------
def download_csv_text(url: str, retries: int = 3, timeout: int = 60) -> str:
    last_err = None
    for k in range(1, retries + 1):
        try:
            r = requests.get(url, timeout=timeout, verify=certifi.where())
            r.raise_for_status()
            return r.text
        except Exception as e:
            last_err = e
            if k < retries:
                time.sleep(1.5 * k)
    raise RuntimeError(f"Download failed: {last_err}")


def load_multi_fx(url: str) -> pd.DataFrame:
    text = download_csv_text(url)

    def _try_read(sep: str, decimal: str) -> pd.DataFrame:
        return pd.read_csv(io.StringIO(text), sep=sep, encoding="utf-8-sig", decimal=decimal)

    raw = _try_read(",", ".")
    if "DATE" not in raw.columns:
        raw = _try_read(";", ".")
    if "DATE" not in raw.columns:
        for sep in (",", ";"):
            raw = _try_read(sep, ",")
            if "DATE" in raw.columns:
                break
    if "DATE" not in raw.columns:
        raise ValueError("DATE column missing in MultiFXData.")

    raw["DATE"] = pd.to_datetime(raw["DATE"], errors="coerce")
    raw = raw.dropna(subset=["DATE"]).sort_values("DATE").set_index("DATE")

    num_df = raw.apply(pd.to_numeric, errors="coerce")
    daily_idx = pd.date_range(num_df.index.min(), num_df.index.max(), freq="D")
    df_d = num_df.reindex(daily_idx).ffill()
    df_d.index.name = "DATE"
    return df_d


def load_covariates(url: str) -> pd.DataFrame:
    text = download_csv_text(url)
    raw = pd.read_csv(io.StringIO(text))

    required = {"Date"} | set(COV_COLS)
    missing = required - set(raw.columns)
    if missing:
        raise ValueError(f"Missing covariate columns: {missing}. Got: {list(raw.columns)}")

    df = (
        raw[list(required)]
        .rename(columns={"Date": "DATE"})
        .assign(DATE=lambda x: pd.to_datetime(x["DATE"], errors="coerce"))
        .dropna(subset=["DATE"])
        .sort_values("DATE")
        .set_index("DATE")
    )
    for c in COV_COLS:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    full_idx = pd.date_range(df.index.min(), df.index.max(), freq="D")
    df_d = df.reindex(full_idx).ffill()
    df_d.index.name = "DATE"
    return df_d


def load_finetune_fx_panel(url: str) -> pd.DataFrame:
    text = download_csv_text(url)
    raw = pd.read_csv(io.StringIO(text), sep=";", decimal=".", encoding="utf-8-sig")
    required = ["ds"] + FINETUNE_FX_COLS
    missing = set(required) - set(raw.columns)
    if missing:
        raise ValueError(f"Missing columns in NB panel: {missing}")

    df = (
        raw[required]
        .rename(columns={"ds": "DATE"})
        .assign(DATE=lambda x: pd.to_datetime(x["DATE"], dayfirst=True, errors="coerce"))
        .dropna(subset=["DATE"])
        .sort_values("DATE")
        .set_index("DATE")
    )
    for c in FINETUNE_FX_COLS:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    df = df.dropna(how="all", subset=FINETUNE_FX_COLS)
    return df


def series_daily_and_b(df_d: pd.DataFrame, col: str) -> Tuple[pd.Series, pd.Series]:
    S_d = df_d[col].astype(float)
    S_b = S_d.asfreq("B").ffill()
    return S_b, S_d


def last_trading_day(S_b: pd.Series, start: pd.Timestamp, end: pd.Timestamp) -> Optional[pd.Timestamp]:
    sl = S_b.loc[start:end]
    return sl.index[-1] if not sl.empty else None


def make_pastcov_context_multi(hist_wide: pd.DataFrame, series_name: str) -> pd.DataFrame:
    ctx = pd.DataFrame({
        "timestamp": hist_wide.index,
        "item_id": series_name,
        "target": hist_wide[series_name].astype(float).values,
    })
    for c in COV_COLS:
        ctx[c] = hist_wide[c].astype(float).values
    return ctx.sort_values("timestamp")


def build_base_chronos_pipeline() -> BaseChronosPipeline:
    if not torch.cuda.is_available():
        raise SystemExit("CUDA not available.")
    return BaseChronosPipeline.from_pretrained(
        "amazon/chronos-2",
        device_map="cuda",
        torch_dtype=torch.float16,
    )


def finetune_chronos_on_nb_panel(
    pipeline: BaseChronosPipeline,
    fx_panel: pd.DataFrame,
    ft_params,
) -> BaseChronosPipeline:
    train_inputs: List[Dict] = []
    for col in FINETUNE_FX_COLS:
        series = fx_panel[col].dropna().astype(np.float32).values
        if series.size < ft_params.prediction_length * 2:
            continue
        train_inputs.append(
            {"target": series, "past_covariates": {}, "future_covariates": {}}
        )

    pipeline = pipeline.fit(
        inputs=train_inputs,
        prediction_length=ft_params.prediction_length,
        num_steps=ft_params.num_steps,
        learning_rate=ft_params.learning_rate,
        batch_size=ft_params.batch_size,
        logging_steps=ft_params.logging_steps,
    )
    return pipeline


def build_model_chronos2_multi_pastcov(
    pipeline: BaseChronosPipeline,
    max_context: int,
) -> Callable[[pd.DataFrame, int], pd.DataFrame]:

    def _extract_median(pred: pd.DataFrame) -> pd.DataFrame:
        df = pred.copy()
        if "timestamp" in df.columns:
            df = df.sort_values("timestamp")
        if "0.5" in df.columns:
            df["y_pred"] = df["0.5"]
        elif "predictions" in df.columns:
            df["y_pred"] = df["predictions"]
        elif "forecast" in df.columns and "quantile" in df.columns:
            df = df.loc[df["quantile"] == 0.5].copy()
            df["y_pred"] = df["forecast"]
        else:
            for cand in ("forecast", "p50", "median", "mean"):
                if cand in df.columns:
                    df["y_pred"] = df[cand]
                    break
            else:
                raise RuntimeError("Unsupported Chronos schema.")
        return df[["item_id", "timestamp", "y_pred"]].copy()

    def forecast_fn(context_df: pd.DataFrame, H: int) -> pd.DataFrame:
        if len(context_df) > max_context:
            context_df = context_df.iloc[-max_context:].copy()
        with torch.inference_mode():
            pred = pipeline.predict_df(
                context_df,
                prediction_length=H,
                quantile_levels=[0.5],
                id_column="item_id",
                timestamp_column="timestamp",
                target="target",
                predict_batches_jointly=False,
            )
        return _extract_median(pred)

    return forecast_fn


def walk_forward_monthly_pastcov(
    S_b: pd.Series,
    DF_wide: pd.DataFrame,
    forecast_fn: Callable[[pd.DataFrame, int], pd.DataFrame],
    series_name: str,
    m_freq: str = "M",
    min_hist_days: int = 40,
    max_context: int = 2048,
    max_horizon: int = 64,
    start_period: Optional[pd.Period] = None,
) -> pd.DataFrame:

    first_m = pd.Period(S_b.index.min(), freq=m_freq)
    last_m  = pd.Period(S_b.index.max(),  freq=m_freq)
    if start_period is not None:
        first_m = max(first_m, start_period)
    months = pd.period_range(first_m, last_m, freq=m_freq)

    rows, dropped = {}, {}

    for m in months:
        prev_m = m - 1
        m_start, m_end = m.start_time, m.end_time
        prev_start, prev_end = prev_m.start_time, prev_m.end_time

        cut = last_trading_day(S_b, prev_start, prev_end)
        if cut is None:
            dropped[str(m)] = "no_cut"
            continue

        hist_wide = DF_wide.loc[:cut]
        if hist_wide.shape[0] < min_hist_days:
            dropped[str(m)] = "short_hist"
            continue

        idx_m_b = S_b.index[(S_b.index >= m_start) & (S_b.index <= m_end)]
        if idx_m_b.size < 1:
            dropped[str(m)] = "no_bdays"
            continue
        y_true = float(S_b.loc[idx_m_b].mean())

        H = (m_end.date() - m_start.date()).days + 1
        if H <= 0 or H > max_horizon:
            dropped[str(m)] = "bad_H"
            continue

        context_df = make_pastcov_context_multi(hist_wide, series_name)
        df_pred = forecast_fn(context_df, H)
        if df_pred.empty:
            dropped[str(m)] = "no_pred"
            continue

        f_idx = pd.date_range(cut + pd.Timedelta(days=1), periods=H, freq="D")
        df_fx = df_pred[df_pred["item_id"] == series_name].copy().sort_values("timestamp")

        pred_daily = pd.Series(index=f_idx, dtype=float)
        tmp = df_fx.set_index("timestamp")["y_pred"]
        common_idx = tmp.index.intersection(pred_daily.index)
        pred_daily.loc[common_idx] = tmp.loc[common_idx]
        pred_daily = pred_daily.ffill()

        pred_b = pred_daily.reindex(idx_m_b, method=None)
        if pred_b.isna().all():
            dropped[str(m)] = "no_overlap"
            continue
        y_pred = float(pred_b.dropna().mean())

        rows[str(m)] = {"month": m, "y_true": y_true, "y_pred": y_pred}

    df = pd.DataFrame.from_dict(rows, orient="index")
    if not df.empty:
        df = df.set_index("month").sort_index()
    return df


def evaluate_with_dm(eval_df: pd.DataFrame) -> Dict[str, float]:
    df = eval_df.copy()
    df["err"] = df["y_true"] - df["y_pred"]
    core = df.dropna(subset=["y_true", "y_pred"]).copy()

    n_obs = int(len(core))
    rmse = float(np.sqrt(np.mean(np.square(core["err"])))) if n_obs else np.nan
    mae  = float(mean_absolute_error(core["y_true"], core["y_pred"])) if n_obs else np.nan

    core["y_prev"] = core["y_true"].shift(1)
    mask = core["y_prev"].notna()
    dir_true = np.sign(core.loc[mask, "y_true"] - core.loc[mask, "y_prev"])
    dir_pred = np.sign(core.loc[mask, "y_pred"] - core.loc[mask, "y_prev"])
    hits = int((dir_true.values == dir_pred.values).sum())
    total = int(mask.sum())
    dir_acc = (hits / total) if total else np.nan

    def _normal_cdf(z: float) -> float:
        return 0.5 * (1.0 + math.erf(z / math.sqrt(2.0)))

    def dm_test(y_true, y_model, y_rw, h: int = 1):
        dfx = pd.concat({"y": y_true, "m": y_model, "rw": y_rw}, axis=1).dropna()
        if dfx.empty or len(dfx) < 5:
            return float("nan"), float("nan")
        e_m = dfx["y"] - dfx["m"]
        e_r = dfx["y"] - dfx["rw"]
        d = (e_m**2) - (e_r**2)
        N = int(len(d))
        d_mean = float(d.mean())
        gamma0 = float(np.var(d, ddof=1)) if N > 1 else 0.0
        var_bar = gamma0 / N
        dm_stat = d_mean / math.sqrt(var_bar) if var_bar > 0 else float("nan")
        p_val = 2.0 * (1.0 - _normal_cdf(abs(dm_stat))) if np.isfinite(dm_stat) else float("nan")
        return dm_stat, p_val

    df["rw_pred"] = df["y_true"].shift(1)
    dm_stat, p_val = dm_test(df["y_true"], df["y_pred"], df["rw_pred"], h=1)

    return {
        "observations": n_obs,
        "rmse": rmse,
        "mae": mae,
        "dir_hits": hits,
        "dir_total": total,
        "dir_acc": dir_acc,
        "dm_stat": float(dm_stat) if np.isfinite(dm_stat) else np.nan,
        "dm_pvalue": float(p_val) if np.isfinite(p_val) else np.nan,
    }



df_eval_d = load_multi_fx(MULTI_URL)
cov_d = load_covariates(COV_URL)
df_eval_d = df_eval_d.join(cov_d, how="inner")

fx_cols = [c for c in df_eval_d.columns if c not in COV_COLS]

print(f"Eval D: {df_eval_d.index.min().date()} → {df_eval_d.index.max().date()} | n={len(df_eval_d)}")
print("Covariates:", COV_COLS)
print("FX cols:", fx_cols[:10], "...")


fx_panel_full = load_finetune_fx_panel(NB_PANEL_URL)
ft_end = fx_panel_full.index.max()

base_pipe = build_base_chronos_pipeline()
ft_pipe = finetune_chronos_on_nb_panel(base_pipe, fx_panel_full, BEST_FT_PARAMS)

eval_start_period = pd.Period(ft_end, freq=BEST_FT_PARAMS.m_freq) + 1
print("Eval start period:", eval_start_period)

forecast_fn = build_model_chronos2_multi_pastcov(ft_pipe, max_context=BEST_FT_PARAMS.max_context)

# -----------------------------
# 3) Walk-forward + metrics per FX
# -----------------------------
metrics_rows = []
for col in fx_cols:
    S_b, S_d = series_daily_and_b(df_eval_d, col)

    df_m = walk_forward_monthly_pastcov(
        S_b=S_b,
        DF_wide=df_eval_d,
        forecast_fn=forecast_fn,
        series_name=col,
        m_freq=BEST_FT_PARAMS.m_freq,
        min_hist_days=BEST_FT_PARAMS.min_hist_days,
        max_context=BEST_FT_PARAMS.max_context,
        max_horizon=64,
        start_period=eval_start_period,
    )

    if df_m.empty:
        continue

    m = evaluate_with_dm(df_m)
    m["series"] = col
    metrics_rows.append(m)

    print(
        f"[{col}] Obs={m['observations']}, RMSE={m['rmse']:.4f}, MAE={m['mae']:.4f}, "
        f"DirAcc={m['dir_hits']}/{m['dir_total']} ({m['dir_acc']*100:.1f}%), "
        f"DM={m['dm_stat']:.3f}, p={m['dm_pvalue']:.4f}"
    )

metrics_df = pd.DataFrame(metrics_rows).sort_values("rmse").reset_index(drop=True)
metrics_df


Eval D: 2000-01-31 → 2024-12-12 | n=9083
Covariates: ['VIX', 'SP500', 'Brent']
FX cols: ['I44', 'AUD', 'EUR', 'CAD', 'GBP', 'HKD', 'JPY', 'MYR', 'NZD', 'SGD'] ...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,1.8615
20,2.3732


Eval start period: 2000-01
[I44] Obs=297, RMSE=1.4241, MAE=0.6947, DirAcc=181/296 (61.1%), DM=2.231, p=0.0257
[AUD] Obs=297, RMSE=0.0931, MAE=0.0475, DirAcc=185/296 (62.5%), DM=1.778, p=0.0754
[EUR] Obs=297, RMSE=0.1451, MAE=0.0671, DirAcc=190/296 (64.2%), DM=2.574, p=0.0100
[CAD] Obs=297, RMSE=0.1015, MAE=0.0503, DirAcc=172/296 (58.1%), DM=2.054, p=0.0400
[GBP] Obs=297, RMSE=0.2530, MAE=0.1102, DirAcc=187/296 (63.2%), DM=1.300, p=0.1936
[HKD] Obs=297, RMSE=0.0249, MAE=0.0116, DirAcc=188/296 (63.5%), DM=1.822, p=0.0685
[JPY] Obs=297, RMSE=0.1970, MAE=0.0849, DirAcc=170/296 (57.4%), DM=1.747, p=0.0806
[MYR] Obs=297, RMSE=0.0387, MAE=0.0193, DirAcc=186/296 (62.8%), DM=2.182, p=0.0291
[NZD] Obs=297, RMSE=0.0852, MAE=0.0454, DirAcc=185/296 (62.5%), DM=2.215, p=0.0267
[SGD] Obs=297, RMSE=0.1091, MAE=0.0545, DirAcc=192/296 (64.9%), DM=1.028, p=0.3040
[SEK] Obs=297, RMSE=1.4693, MAE=0.6863, DirAcc=185/296 (62.5%), DM=2.096, p=0.0361
[PLN] Obs=297, RMSE=0.0507, MAE=0.0230, DirAcc=196/296 (66.2

Unnamed: 0,observations,rmse,mae,dir_hits,dir_total,dir_acc,dm_stat,dm_pvalue,series
0,297,0.001664,0.000757,190,296,0.641892,2.094806,0.036188,IDR
1,297,0.014551,0.007051,187,296,0.631757,0.514755,0.606724,KRW
2,297,0.024873,0.011575,188,296,0.635135,1.821727,0.068496,HKD
3,297,0.038697,0.019291,186,296,0.628378,2.182229,0.029093,MYR
4,297,0.050739,0.022971,196,296,0.662162,2.087653,0.036829,PLN
5,297,0.085193,0.045367,185,296,0.625,2.215359,0.026735,NZD
6,297,0.093143,0.047529,185,296,0.625,1.778233,0.075366,AUD
7,297,0.101456,0.05032,172,296,0.581081,2.054096,0.039966,CAD
8,297,0.109117,0.054505,192,296,0.648649,1.027814,0.304038,SGD
9,297,0.145125,0.067061,190,296,0.641892,2.574438,0.01004,EUR


## MultiFX - Q

In [None]:

from __future__ import annotations
import io, time, math
from typing import Optional, Tuple, Dict, Callable, List

import numpy as np
import pandas as pd
import requests, certifi
from sklearn.metrics import mean_absolute_error

import torch
from chronos import BaseChronosPipeline


# -----------------------------
# URLs / kolonner
# -----------------------------
MULTI_URL = (
    "https://raw.githubusercontent.com/bredeespelid/"
    "Data_MasterOppgave/refs/heads/main/EURNOK/MultiFXData.csv"
)
NB_PANEL_URL = (
    "https://raw.githubusercontent.com/bredeespelid/"
    "Data_MasterOppgave/refs/heads/main/FineTuneData/NB1980-1999.csv"
)
COV_URL = (
    "https://raw.githubusercontent.com/bredeespelid/"
    "Data_MasterOppgave/refs/heads/main/Variables/All_Variables/variables_daily.csv"
)

COV_COLS = ["VIX", "SP500", "Brent"]

FINETUNE_FX_COLS = [
    "AUD", "CAD", "CHF", "DKK", "GBP",
    "ISK", "JPY", "NZD", "SEK", "USD", "XDR",
]

Q_FREQ = "Q-DEC"
MIN_HIST_DAYS = 40
MAX_CONTEXT = 2048
MAX_HORIZON = 256


# -----------------------------
# Helpers
# -----------------------------
def download_csv_text(url: str, retries: int = 3, timeout: int = 60) -> str:
    last_err = None
    for k in range(1, retries + 1):
        try:
            r = requests.get(url, timeout=timeout, verify=certifi.where())
            r.raise_for_status()
            return r.text
        except Exception as e:
            last_err = e
            if k < retries:
                time.sleep(1.5 * k)
    raise RuntimeError(f"Download failed: {last_err}")


def load_multi_fx(url: str) -> pd.DataFrame:
    text = download_csv_text(url)

    def _try_read(sep: str, decimal: str) -> pd.DataFrame:
        return pd.read_csv(io.StringIO(text), sep=sep, encoding="utf-8-sig", decimal=decimal)

    raw = _try_read(",", ".")
    if "DATE" not in raw.columns:
        raw = _try_read(";", ".")
    if "DATE" not in raw.columns:
        for sep in (",", ";"):
            raw = _try_read(sep, ",")
            if "DATE" in raw.columns:
                break
    if "DATE" not in raw.columns:
        raise ValueError("DATE column missing in MultiFXData.")

    raw["DATE"] = pd.to_datetime(raw["DATE"], errors="coerce")
    raw = raw.dropna(subset=["DATE"]).sort_values("DATE").set_index("DATE")

    num_df = raw.apply(pd.to_numeric, errors="coerce")
    daily_idx = pd.date_range(num_df.index.min(), num_df.index.max(), freq="D")
    df_d = num_df.reindex(daily_idx).ffill()
    df_d.index.name = "DATE"
    return df_d


def load_covariates(url: str) -> pd.DataFrame:
    text = download_csv_text(url)
    raw = pd.read_csv(io.StringIO(text))

    required = {"Date"} | set(COV_COLS)
    missing = required - set(raw.columns)
    if missing:
        raise ValueError(f"Missing covariate columns: {missing}. Got: {list(raw.columns)}")

    df = (
        raw[list(required)]
        .rename(columns={"Date": "DATE"})
        .assign(DATE=lambda x: pd.to_datetime(x["DATE"], errors="coerce"))
        .dropna(subset=["DATE"])
        .sort_values("DATE")
        .set_index("DATE")
    )
    for c in COV_COLS:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    full_idx = pd.date_range(df.index.min(), df.index.max(), freq="D")
    df_d = df.reindex(full_idx).ffill()
    df_d.index.name = "DATE"
    return df_d


def load_finetune_fx_panel(url: str) -> pd.DataFrame:
    text = download_csv_text(url)
    raw = pd.read_csv(io.StringIO(text), sep=";", decimal=".", encoding="utf-8-sig")

    required = ["ds"] + FINETUNE_FX_COLS
    missing = set(required) - set(raw.columns)
    if missing:
        raise ValueError(f"Missing columns in NB panel: {missing}")

    df = (
        raw[required]
        .rename(columns={"ds": "DATE"})
        .assign(DATE=lambda x: pd.to_datetime(x["DATE"], dayfirst=True, errors="coerce"))
        .dropna(subset=["DATE"])
        .sort_values("DATE")
        .set_index("DATE")
    )
    for c in FINETUNE_FX_COLS:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    df = df.dropna(how="all", subset=FINETUNE_FX_COLS)
    return df


def series_daily_and_b(df_d: pd.DataFrame, col: str) -> Tuple[pd.Series, pd.Series]:
    S_d = df_d[col].astype(float)
    S_b = S_d.asfreq("B").ffill()
    return S_b, S_d


def last_trading_day(S_b: pd.Series, start: pd.Timestamp, end: pd.Timestamp) -> Optional[pd.Timestamp]:
    sl = S_b.loc[start:end]
    return sl.index[-1] if not sl.empty else None


def make_pastcov_context_multi(hist_wide: pd.DataFrame, series_name: str) -> pd.DataFrame:
    ctx = pd.DataFrame({
        "timestamp": hist_wide.index,
        "item_id": series_name,
        "target": hist_wide[series_name].astype(float).values,
    })
    for c in COV_COLS:
        ctx[c] = hist_wide[c].astype(float).values
    return ctx.sort_values("timestamp")


def build_base_chronos_pipeline() -> BaseChronosPipeline:
    if not torch.cuda.is_available():
        raise SystemExit("CUDA not available.")
    return BaseChronosPipeline.from_pretrained(
        "amazon/chronos-2",
        device_map="cuda",
        torch_dtype=torch.float16,
    )


def finetune_chronos_on_nb_panel(
    pipeline: BaseChronosPipeline,
    fx_panel: pd.DataFrame,
    ft_params,
) -> BaseChronosPipeline:
    train_inputs: List[Dict] = []
    for col in FINETUNE_FX_COLS:
        series = fx_panel[col].dropna().astype(np.float32).values
        if series.size < ft_params.prediction_length * 2:
            continue
        train_inputs.append(
            {"target": series, "past_covariates": {}, "future_covariates": {}}
        )

    pipeline = pipeline.fit(
        inputs=train_inputs,
        prediction_length=ft_params.prediction_length,
        num_steps=ft_params.num_steps,
        learning_rate=ft_params.learning_rate,
        batch_size=ft_params.batch_size,
        logging_steps=ft_params.logging_steps,
    )
    return pipeline


def build_model_chronos2_multi_pastcov(
    pipeline: BaseChronosPipeline,
    max_context: int,
) -> Callable[[pd.DataFrame, int], pd.DataFrame]:

    def _extract_median(pred: pd.DataFrame) -> pd.DataFrame:
        df = pred.copy()
        if "timestamp" in df.columns:
            df = df.sort_values("timestamp")
        if "0.5" in df.columns:
            df["y_pred"] = df["0.5"]
        elif "predictions" in df.columns:
            df["y_pred"] = df["predictions"]
        elif "forecast" in df.columns and "quantile" in df.columns:
            df = df.loc[df["quantile"] == 0.5].copy()
            df["y_pred"] = df["forecast"]
        else:
            for cand in ("forecast", "p50", "median", "mean"):
                if cand in df.columns:
                    df["y_pred"] = df[cand]
                    break
            else:
                raise RuntimeError("Unsupported Chronos schema.")
        return df[["item_id", "timestamp", "y_pred"]].copy()

    def forecast_fn(context_df: pd.DataFrame, H: int) -> pd.DataFrame:
        if len(context_df) > max_context:
            context_df = context_df.iloc[-max_context:].copy()
        with torch.inference_mode():
            pred = pipeline.predict_df(
                context_df,
                prediction_length=H,
                quantile_levels=[0.5],
                id_column="item_id",
                timestamp_column="timestamp",
                target="target",
                predict_batches_jointly=False,
            )
        return _extract_median(pred)

    return forecast_fn


def walk_forward_quarterly_pastcov(
    S_b: pd.Series,
    DF_wide: pd.DataFrame,
    forecast_fn: Callable[[pd.DataFrame, int], pd.DataFrame],
    series_name: str,
    start_period: Optional[pd.Period] = None,
) -> pd.DataFrame:
    """
    Quarterly walk-forward med past covariates:
      - Cut = last B-day in previous quarter
      - Kontext = historikk fram t.o.m cut (inkl. covariates)
      - Forecast next quarter daily
      - Aggregate til kvartalsgjennomsnitt over B-dager
    """
    first_q = pd.Period(S_b.index.min(), freq=Q_FREQ)
    last_q  = pd.Period(S_b.index.max(),  freq=Q_FREQ)
    if start_period is not None:
        first_q = max(first_q, start_period)

    quarters = pd.period_range(first_q, last_q, freq=Q_FREQ)

    rows, dropped = {}, {}

    for q in quarters:
        prev_q = q - 1
        q_start, q_end = q.start_time, q.end_time
        prev_start, prev_end = prev_q.start_time, prev_q.end_time

        cut = last_trading_day(S_b, prev_start, prev_end)
        if cut is None:
            dropped[str(q)] = "no_cut_in_prev_q"
            continue

        hist_wide = DF_wide.loc[:cut]
        if hist_wide.shape[0] < MIN_HIST_DAYS:
            dropped[str(q)] = f"hist<{MIN_HIST_DAYS}"
            continue

        idx_q_b = S_b.index[(S_b.index >= q_start) & (S_b.index <= q_end)]
        if idx_q_b.size < 1:
            dropped[str(q)] = "no_bdays_in_q"
            continue
        y_true = float(S_b.loc[idx_q_b].mean())

        H = (q_end.date() - q_start.date()).days + 1
        if H <= 0 or H > MAX_HORIZON:
            dropped[str(q)] = f"horizon_invalid(H={H})"
            continue

        context_df = make_pastcov_context_multi(hist_wide, series_name)
        df_pred = forecast_fn(context_df, H)
        if df_pred.empty:
            dropped[str(q)] = "no_predictions"
            continue

        f_idx = pd.date_range(cut + pd.Timedelta(days=1), periods=H, freq="D")
        df_fx = df_pred[df_pred["item_id"] == series_name].copy().sort_values("timestamp")

        pred_daily = pd.Series(index=f_idx, dtype=float)
        tmp = df_fx.set_index("timestamp")["y_pred"]
        common = tmp.index.intersection(pred_daily.index)
        pred_daily.loc[common] = tmp.loc[common]
        pred_daily = pred_daily.ffill()

        pred_b = pred_daily.reindex(idx_q_b, method=None)
        if pred_b.isna().all():
            dropped[str(q)] = "no_overlap_pred_B_days"
            continue
        y_pred = float(pred_b.dropna().mean())

        rows[str(q)] = {"quarter": q, "y_true": y_true, "y_pred": y_pred}

    df = pd.DataFrame.from_dict(rows, orient="index")
    if not df.empty:
        df = df.set_index("quarter").sort_index()
    return df


def evaluate_with_dm(eval_df: pd.DataFrame) -> Dict[str, float]:
    df = eval_df.copy()
    df["err"] = df["y_true"] - df["y_pred"]
    core = df.dropna(subset=["y_true", "y_pred"]).copy()

    n_obs = int(len(core))
    rmse = float(np.sqrt(np.mean(np.square(core["err"])))) if n_obs else np.nan
    mae  = float(mean_absolute_error(core["y_true"], core["y_pred"])) if n_obs else np.nan

    core["y_prev"] = core["y_true"].shift(1)
    mask = core["y_prev"].notna()
    dir_true = np.sign(core.loc[mask, "y_true"] - core.loc[mask, "y_prev"])
    dir_pred = np.sign(core.loc[mask, "y_pred"] - core.loc[mask, "y_prev"])
    hits = int((dir_true.values == dir_pred.values).sum())
    total = int(mask.sum())
    dir_acc = (hits / total) if total else np.nan

    # DM vs RW
    def _normal_cdf(z: float) -> float:
        return 0.5 * (1.0 + math.erf(z / math.sqrt(2.0)))

    def dm_test(y_true, y_model, y_rw, h: int = 1):
        dfx = pd.concat({"y": y_true, "m": y_model, "rw": y_rw}, axis=1).dropna()
        if dfx.empty or len(dfx) < 5:
            return float("nan"), float("nan")
        e_m = dfx["y"] - dfx["m"]
        e_r = dfx["y"] - dfx["rw"]
        d = (e_m**2) - (e_r**2)
        N = int(len(d))
        d_mean = float(d.mean())
        gamma0 = float(np.var(d, ddof=1)) if N > 1 else 0.0
        var_bar = gamma0 / N
        dm_stat = d_mean / math.sqrt(var_bar) if var_bar > 0 else float("nan")
        p_val = 2.0 * (1.0 - _normal_cdf(abs(dm_stat))) if np.isfinite(dm_stat) else float("nan")
        return dm_stat, p_val

    df["rw_pred"] = df["y_true"].shift(1)
    dm_stat, p_val = dm_test(df["y_true"], df["y_pred"], df["rw_pred"], h=1)

    return {
        "observations": n_obs,
        "rmse": rmse,
        "mae": mae,
        "dir_hits": hits,
        "dir_total": total,
        "dir_acc": dir_acc,
        "dm_stat": float(dm_stat) if np.isfinite(dm_stat) else np.nan,
        "dm_pvalue": float(p_val) if np.isfinite(p_val) else np.nan,
    }


# -----------------------------
# 1) Load eval data + covariates
# -----------------------------
df_eval_d = load_multi_fx(MULTI_URL)
cov_d = load_covariates(COV_URL)
df_eval_d = df_eval_d.join(cov_d, how="inner")

fx_cols = [c for c in df_eval_d.columns if c not in COV_COLS]

print(f"Eval D (merged): {df_eval_d.index.min().date()} → {df_eval_d.index.max().date()} | n={len(df_eval_d)}")
print("Covariates:", COV_COLS)
print(f"Running quarterly walk-forward for {len(fx_cols)} series.")

fx_panel_full = load_finetune_fx_panel(NB_PANEL_URL)
ft_end = fx_panel_full.index.max()

base_pipe = build_base_chronos_pipeline()
ft_pipe = finetune_chronos_on_nb_panel(base_pipe, fx_panel_full, BEST_FT_PARAMS)

eval_start_period = pd.Period(ft_end, freq=Q_FREQ) + 1
print("Eval start quarter:", eval_start_period)

forecast_fn = build_model_chronos2_multi_pastcov(
    ft_pipe,
    max_context=min(MAX_CONTEXT, getattr(BEST_FT_PARAMS, "max_context", MAX_CONTEXT))
)

# -----------------------------
# 3) Walk-forward + metrics per FX (quarterly)
# -----------------------------
metrics_rows = []

for col in fx_cols:
    S_b, S_d = series_daily_and_b(df_eval_d, col)

    df_q = walk_forward_quarterly_pastcov(
        S_b=S_b,
        DF_wide=df_eval_d,
        forecast_fn=forecast_fn,
        series_name=col,
        start_period=eval_start_period,
    )

    if df_q.empty:
        continue

    m = evaluate_with_dm(df_q)
    m["series"] = col
    metrics_rows.append(m)

    print(
        f"[{col}] Obs={m['observations']}, RMSE={m['rmse']:.4f}, MAE={m['mae']:.4f}, "
        f"DirAcc={m['dir_hits']}/{m['dir_total']} ({m['dir_acc']*100:.1f}%), "
        f"DM={m['dm_stat']:.3f}, p={m['dm_pvalue']:.4f}"
    )

metrics_df_q = (
    pd.DataFrame(metrics_rows)
    .loc[:, ["series", "observations", "rmse", "mae", "dir_hits", "dir_total", "dir_acc", "dm_stat", "dm_pvalue"]]
    .sort_values("rmse")
    .reset_index(drop=True)
)

metrics_df_q


Eval D (merged): 2000-01-31 → 2024-12-12 | n=9083
Covariates: ['VIX', 'SP500', 'Brent']
Running quarterly walk-forward for 19 series.


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,1.8615
20,2.3732


Eval start quarter: 2000Q1
[I44] Obs=99, RMSE=2.2990, MAE=1.2097, DirAcc=53/98 (54.1%), DM=0.840, p=0.4007
[AUD] Obs=99, RMSE=0.1447, MAE=0.0768, DirAcc=65/98 (66.3%), DM=0.283, p=0.7768
[EUR] Obs=99, RMSE=0.2374, MAE=0.1207, DirAcc=58/98 (59.2%), DM=1.642, p=0.1006
[CAD] Obs=99, RMSE=0.1539, MAE=0.0752, DirAcc=56/98 (57.1%), DM=-0.201, p=0.8405
[GBP] Obs=99, RMSE=0.4474, MAE=0.1921, DirAcc=61/98 (62.2%), DM=0.820, p=0.4124
[HKD] Obs=99, RMSE=0.0404, MAE=0.0208, DirAcc=46/98 (46.9%), DM=0.660, p=0.5095
[JPY] Obs=99, RMSE=0.3170, MAE=0.1464, DirAcc=55/98 (56.1%), DM=0.142, p=0.8867
[MYR] Obs=99, RMSE=0.0590, MAE=0.0307, DirAcc=56/98 (57.1%), DM=-0.114, p=0.9091
[NZD] Obs=99, RMSE=0.1320, MAE=0.0730, DirAcc=59/98 (60.2%), DM=0.201, p=0.8409
[SGD] Obs=99, RMSE=0.1795, MAE=0.0909, DirAcc=53/98 (54.1%), DM=0.463, p=0.6432
[SEK] Obs=99, RMSE=2.3957, MAE=1.2634, DirAcc=62/98 (63.3%), DM=0.924, p=0.3557
[PLN] Obs=99, RMSE=0.0881, MAE=0.0427, DirAcc=63/98 (64.3%), DM=1.486, p=0.1372
[USD] Obs=9

Unnamed: 0,series,observations,rmse,mae,dir_hits,dir_total,dir_acc,dm_stat,dm_pvalue
0,IDR,99,0.002556,0.001342,68,98,0.693878,0.519881,0.603147
1,KRW,99,0.023644,0.012355,57,98,0.581633,-0.541491,0.588169
2,HKD,99,0.040384,0.020784,46,98,0.469388,0.659658,0.509473
3,MYR,99,0.058951,0.030676,56,98,0.571429,-0.114142,0.909125
4,PLN,99,0.088055,0.042714,63,98,0.642857,1.486476,0.137153
5,NZD,99,0.132022,0.072978,59,98,0.602041,0.200772,0.840877
6,AUD,99,0.144664,0.076821,65,98,0.663265,0.283435,0.776843
7,CAD,99,0.153896,0.075202,56,98,0.571429,-0.201268,0.840489
8,SGD,99,0.179479,0.0909,53,98,0.540816,0.46318,0.643235
9,EUR,99,0.237374,0.120678,58,98,0.591837,1.642129,0.100563
