In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create project directory and change working directory
!mkdir -p '/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)'
%cd '/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)'

# Clone source code repository if not exists
!git clone https://github.com/amazon-science/chronos-forecasting
%cd chronos-forecasting

# Install required packages
%pip install torch transformers datasets accelerate scikit-learn tqdm joblib

import os
import sys
import json
import math
import random
import warnings
from typing import Dict, Any, Optional, Tuple, List

import numpy as np
import pandas as pd
import torch
! pip install yfinance
import yfinance as yf
import statsmodels.api as sm
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
from transformers.trainer_utils import IntervalStrategy
from transformers.data.data_collator import default_data_collator

from peft import LoraConfig, get_peft_model, PeftModel, TaskType

from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import f as f_dist
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")


# ========================= Basic Settings =========================
def ensure_chronos_import() -> None:
    """
    Try to import chronos; if failed, add local/Colab repo src to sys.path.
    """
    try:
        import chronos  # noqa: F401
        return
    except Exception:
        pass
    candidates = [
        "/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/3_Benchmark/5_Foundation_Models/Chronos/chronos-forecasting/src",
        "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos-forecasting/src",
        "/content/chronos-forecasting/src",
    ]
    for repo_src in candidates:
        if os.path.isdir(repo_src) and repo_src not in sys.path:
            sys.path.append(repo_src)
            try:
                import chronos  # noqa: F401
                return
            except Exception:
                continue
    raise ImportError("chronos import failed. Please clone chronos-forecasting and ensure its 'src' is on sys.path.")

def set_seed(seed: int = 42) -> None:
    """
    Set random seed for reproducibility (CPU, CUDA, MPS).
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    if torch.backends.mps.is_available():
        try:
            torch.mps.manual_seed(seed)
        except Exception:
            pass

def get_device() -> torch.device:
    """
    Prefer MPS (Apple Silicon), otherwise use CUDA, otherwise CPU.
    """
    if torch.backends.mps.is_available():
        return torch.device("mps")
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

Mounted at /content/drive
/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)
fatal: destination path 'chronos-forecasting' already exists and is not an empty directory.
/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos-forecasting
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-many

In [2]:
def annual_sharpe(rets, freq=252):
    mu = float(np.mean(rets)) * freq
    sd = float(np.std(rets, ddof=1)) * np.sqrt(freq)
    return mu / sd if sd > 0 else 0

# Load risk-free rate & calculate S&P500 Excess Sharpe

rf_file = "/content/drive/MyDrive/ERP Data/CRSP_2016_2024_top50_with_exret.csv"
try:
    rf_df = pd.read_csv(rf_file, usecols=["date", "rf"])
    rf_df["date"] = pd.to_datetime(rf_df["date"])
    rf_df = rf_df.drop_duplicates("date").set_index("date").sort_index()
    rf_series = rf_df["rf"].astype(float)

    px = yf.download("^GSPC", start="2016-01-01", end="2024-12-31")["Close"]
    sp_ret = px.pct_change().dropna()
    rf_align = rf_series.reindex(sp_ret.index).fillna(method="ffill")
    sp_excess = sp_ret.values - rf_align.values

    SR_MKT_EX = annual_sharpe(sp_excess)
    print(f"[INFO] S&P500 Excess Sharpe (2016–24) = {SR_MKT_EX:.3f}")
except Exception as e:
    print(f"Warning: Could not load risk-free rate data: {e}")
    SR_MKT_EX = 0.5  # Use default value

def delta_sharpe(r2_zero: float, sr_base: float):
    """
    If r2_zero <= 0   → ΔSharpe = 0, Sharpe* = sr_base
    If r2_zero >= 1   → ΔSharpe = 0, Sharpe* = sr_base (extreme case fallback)
    Otherwise, calculate according to the original formula
    """
    if (r2_zero <= 0) or (r2_zero >= 1):
        return 0.0, sr_base
    sr_star = np.sqrt(sr_base ** 2 + r2_zero) / np.sqrt(1 - r2_zero)
    return sr_star - sr_base, sr_star

# Zero-based R²
def r2_zero(y_true, y_pred):
    """
    Calculate zero-based R² (baseline is 0)
    y_true: array of true values (N,)
    y_pred: array of predicted values (N,)
    """
    rss = np.sum((y_true - y_pred)**2)
    tss = np.sum(y_true**2)
    return 1 - rss / tss

def calc_ic_daily(df, method='spearman'):
    """
    Calculate daily cross-sectional RankIC
    df: must contain ['signal_date','y_true','y_pred']
    """
    ics = (df.groupby('signal_date')
             .apply(lambda g: g['y_pred'].corr(g['y_true'], method=method))
             .dropna())
    mean_ic = ics.mean()
    std_ic  = ics.std(ddof=1)
    t_ic    = mean_ic / (std_ic / np.sqrt(len(ics))) if std_ic > 0 else np.nan
    pos_ratio = (ics > 0).mean()
    return mean_ic, t_ic, pos_ratio, ics

def calc_directional_metrics(y_true, y_pred, permnos=None):
    """
    Improved version:
    - Sample-level sign prediction
    - If grouped by stock, calculate Overall, Up, Down for each stock and then average
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    if permnos is None:
        s_true = np.sign(y_true)
        s_pred = np.sign(y_pred)
        mask = s_true != 0
        s_true = s_true[mask]
        s_pred = s_pred[mask]

        overall_acc = np.mean(s_true == s_pred)

        up_mask = s_true > 0
        down_mask = s_true < 0
        up_acc = np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else 0
        down_acc = np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else 0

    else:
        df = pd.DataFrame({"permno": permnos, "yt": y_true, "yp": y_pred})
        overall_accs = []
        up_accs = []
        down_accs = []

        for _, g in df.groupby("permno"):
            s_true = np.sign(g["yt"].values)
            s_pred = np.sign(g["yp"].values)
            mask = s_true != 0
            s_true = s_true[mask]
            s_pred = s_pred[mask]
            if len(s_true) == 0:
                continue
            overall_accs.append(np.mean(s_true == s_pred))

            up_mask = s_true > 0
            down_mask = s_true < 0
            up_accs.append(np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else np.nan)
            down_accs.append(np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else np.nan)

        overall_acc = np.nanmean(overall_accs)
        up_acc = np.nanmean(up_accs)
        down_acc = np.nanmean(down_accs)

    return overall_acc, up_acc, down_acc

def regression_metrics(y_true, y_pred, k, meta=None, permnos=None):
    """
    Includes:
    - Regression metrics
    - Pointwise directional accuracy
    - Market cap group metrics
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    n = len(y_true)

    r2 = r2_zero(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)

    dir_acc, up_acc, down_acc = calc_directional_metrics(y_true, y_pred, permnos)

    metrics = {
        "R²_zero": r2,
        "RMSE": rmse,
        "MAE": mae,
        "MSE": mse,
        "Directional Accuracy": dir_acc,
        "Up_Directional_Acc": up_acc,
        "Down_Directional_Acc": down_acc
    }

    if meta is not None and "MKTCAP_PERCENTILE" in meta:
        top_mask = meta["MKTCAP_PERCENTILE"] >= 0.75
        bottom_mask = meta["MKTCAP_PERCENTILE"] <= 0.25

        if np.any(top_mask):
            yt_top = y_true[top_mask]
            yp_top = y_pred[top_mask]
            perm_top = permnos[top_mask] if permnos is not None else None
            r2_top = r2_zero(yt_top, yp_top)
            rmse_top = np.sqrt(mean_squared_error(yt_top, yp_top))
            mae_top = mean_absolute_error(yt_top, yp_top)
            mse_top = mean_squared_error(yt_top, yp_top)
            dir_top, up_top, down_top = calc_directional_metrics(yt_top, yp_top, perm_top)
            metrics.update({
                "Top25_R2_zero": r2_top,
                "Top25_RMSE": rmse_top,
                "Top25_MAE": mae_top,
                "Top25_MSE": mse_top,
                "Top25_Dir_Acc": dir_top,
                "Top25_Up_Acc": up_top,
                "Top25_Down_Acc": down_top
            })

        if np.any(bottom_mask):
            yt_bot = y_true[bottom_mask]
            yp_bot = y_pred[bottom_mask]
            perm_bot = permnos[bottom_mask] if permnos is not None else None
            r2_bot = r2_zero(yt_bot, yp_bot)
            rmse_bot = np.sqrt(mean_squared_error(yt_bot, yp_bot))
            mae_bot = mean_absolute_error(yt_bot, yp_bot)
            mse_bot = mean_squared_error(yt_bot, yp_bot)
            dir_bot, up_bot, down_bot = calc_directional_metrics(yt_bot, yp_bot, perm_bot)
            metrics.update({
                "Bottom25_R2_zero": r2_bot,
                "Bottom25_RMSE": rmse_bot,
                "Bottom25_MAE": mae_bot,
                "Bottom25_MSE": mse_bot,
                "Bottom25_Dir_Acc": dir_bot,
                "Bottom25_Up_Acc": up_bot,
                "Bottom25_Down_Acc": down_bot
            })

    return metrics

def f_statistic(y_true, y_pred, k):
    """Return F statistic and corresponding p-value"""
    n   = len(y_true)
    rss = np.sum((y_true - y_pred) ** 2)
    tss = np.sum(y_true ** 2)
    r2  = 1 - rss / tss
    if (r2 <= 0) or (n <= k):
        return 0.0, 1.0
    F = (r2 / k) / ((1 - r2) / (n - k))
    p = f_dist.sf(F, k, n - k)
    return F, p

def overall_interval_metrics_method1(y_all, yhat_all, k, permnos_all=None, meta_all=None):
    """
    Method 1: Calculate metrics for the entire interval at once (concatenate all samples from 2016-2024)
    Returns: a dict, can be directly used for save_metrics()
    """
    base = regression_metrics(
        y_true=y_all,
        y_pred=yhat_all,
        k=k,
        meta=meta_all,
        permnos=permnos_all
    )
    F, p = f_statistic(y_all, yhat_all, k)
    base["F_stat"]     = F
    base["F_pvalue"]   = p
    base["N_obs"] = len(y_all)

    delta_cash, sr_star_cash = delta_sharpe(base["R²_zero"], sr_base=0)
    base["ΔSharpe_cash"]      = delta_cash
    base["Sharpe*_cash"]      = sr_star_cash

    delta_mkt , sr_star_mkt  = delta_sharpe(base["R²_zero"], sr_base=SR_MKT_EX)
    base["ΔSharpe_mkt"]       = delta_mkt
    base["Sharpe*_mkt"]       = sr_star_mkt

    return base

def sortino_ratio(rets, freq=252):
    """Calculate Sortino Ratio"""
    downside = rets[rets < 0]
    if len(downside) == 0:
        return np.inf
    mu = rets.mean() * freq
    sigma = np.sqrt((downside ** 2).mean()) * np.sqrt(freq)
    return mu / sigma

def cvar(rets, alpha=0.95):
    """Calculate CVaR"""
    q = np.quantile(rets, 1 - alpha)
    return rets[rets <= q].mean()

def save_metrics(metrics_dict, name, window, path="portfolio_metrics.csv"):
    """Save metrics to CSV file"""
    row = {'Model': name, 'Window': window}
    row.update(metrics_dict)

    if os.path.exists(path):
        df = pd.read_csv(path)
        df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    else:
        df = pd.DataFrame([row])

    df.to_csv(path, index=False)
    print(f"Metrics saved for {name}_w{window} to {path}")

TC_GRID = [0.0005, 0.001, 0.002, 0.003, 0.004]  # 5, 10, 20, 30, 40 bps
TC_TAG  = {
    0.0005: "tc5",
    0.001:  "tc10",
    0.002:  "tc20",
    0.003:  "tc30",
    0.004:  "tc40"
}

class PortfolioBacktester:
    def __init__(self):
        self.results = {}

    def calc_turnover(self, w_t, r_t, w_tp1):
        """Calculate turnover using the standard formula"""
        if w_t is None:
            return np.sum(np.abs(w_tp1))

        gross_ret = np.sum(w_t * r_t)
        if abs(1 + gross_ret) < 1e-8:
            return np.sum(np.abs(w_tp1))

        passive_weight = w_t * (1 + r_t) / (1 + gross_ret)
        turnover = np.sum(np.abs(w_tp1 - passive_weight))
        return turnover

    def create_portfolios_with_permno_tracking(self, signals, market_caps, permnos, top_pct=0.1, bottom_pct=0.1, weight_scheme="VW"):
        """
        Create portfolio weights based on signals, strictly tracking permno alignment
        weight_scheme: 'VW' value-weighted, 'EW' equal-weighted
        """
        n_stocks = len(signals)
        top_n    = max(1, int(round(n_stocks * top_pct)))
        bottom_n = max(1, int(round(n_stocks * bottom_pct)))

        sorted_idx = np.argsort(signals)[::-1]

        top_idx = sorted_idx[:top_n]
        bottom_idx = sorted_idx[-bottom_n:]

        portfolio_data = {}

        long_weights = np.zeros(n_stocks)
        if len(top_idx) > 0:
            if weight_scheme == "VW":
                top_market_caps = market_caps[top_idx]
                if np.sum(top_market_caps) > 0:
                    long_weights[top_idx] = top_market_caps / np.sum(top_market_caps)
            else:
                long_weights[top_idx] = 1.0 / len(top_idx)

        portfolio_data['long_only'] = {
            'weights': long_weights,
            'permnos': permnos.copy(),
            'selected_permnos': permnos[top_idx] if len(top_idx) > 0 else np.array([])
        }

        short_weights = np.zeros(n_stocks)
        if len(bottom_idx) > 0:
            if weight_scheme == "VW":
                bottom_market_caps = market_caps[bottom_idx]
                if np.sum(bottom_market_caps) > 0:
                    short_weights[bottom_idx] = -bottom_market_caps / np.sum(bottom_market_caps)
            else:
                short_weights[bottom_idx] = -1.0 / len(bottom_idx)

        portfolio_data['short_only'] = {
            'weights': short_weights,
            'permnos': permnos.copy(),
            'selected_permnos': permnos[bottom_idx] if len(bottom_idx) > 0 else np.array([])
        }

        ls_raw = long_weights + short_weights

        gross_target = 2.0
        current_gross = np.sum(np.abs(long_weights)) + np.sum(np.abs(short_weights))
        scale = gross_target / current_gross if current_gross > 1e-8 else 0.0
        ls_weights = scale * ls_raw

        ls_selected_permnos = np.concatenate([
            permnos[top_idx] if len(top_idx) > 0 else np.array([]),
            permnos[bottom_idx] if len(bottom_idx) > 0 else np.array([])
        ])

        portfolio_data['long_short'] = {
            'weights': ls_weights,
            'permnos': permnos.copy(),
            'selected_permnos': ls_selected_permnos
        }

        return portfolio_data

    def calculate_aligned_portfolio_return(self, portfolio_weights, portfolio_permnos, actual_returns, actual_permnos):
        """Calculate portfolio return strictly aligned by permno"""
        aligned_returns = np.zeros(len(portfolio_permnos))

        return_dict = dict(zip(actual_permnos, actual_returns))

        for i, permno in enumerate(portfolio_permnos):
            if permno in return_dict:
                aligned_returns[i] = return_dict[permno]

        portfolio_return = np.sum(portfolio_weights * aligned_returns)
        return portfolio_return, aligned_returns

    def calculate_metrics(self, returns, turnover_series=None):
        """Calculate portfolio metrics - returns summary metrics only, not full series"""
        returns = np.array(returns)

        annual_return = np.mean(returns) * 252
        annual_vol = np.std(returns, ddof=1) * np.sqrt(252)
        sharpe = annual_return / annual_vol if annual_vol > 0 else 0

        log_cum = np.cumsum(np.log1p(returns))
        peak_log = np.maximum.accumulate(log_cum)
        dd_log = peak_log - log_cum
        max_drawdown = 1 - np.exp(-dd_log.max())
        max_1d_loss = np.min(returns)

        avg_turnover = np.mean(turnover_series) if turnover_series is not None else 0

        sortino = sortino_ratio(returns)
        cvar95  = cvar(returns, alpha=0.95)

        result = {
            'annual_return': annual_return,
            'annual_vol': annual_vol,
            'sharpe': sharpe,
            'max_drawdown': max_drawdown,
            'max_1d_loss': max_1d_loss,
            'avg_turnover': avg_turnover,
            'sortino': sortino,
            'cvar95': cvar95
        }

        return result

  px = yf.download("^GSPC", start="2016-01-01", end="2024-12-31")["Close"]
[*********************100%***********************]  1 of 1 completed

[INFO] S&P500 Excess Sharpe (2016–24) = 0.652



  rf_align = rf_series.reindex(sp_ret.index).fillna(method="ffill")


In [3]:
# ===== Additional: Utility functions for backtesting (local version) =====

def load_datasets(npz_path: str):
    return np.load(npz_path, allow_pickle=True)

def get_batch_size(window: int) -> int:
    return get_dynamic_batch_size(window, base=512)

@torch.no_grad()
def chronos_rolling_prediction(
    pipeline,
    X_data: np.ndarray,
    batch_size: int = 256,
    prediction_length: int = 1,
    num_samples: int = 10
) -> np.ndarray:
    preds: List[float] = []
    for i in range(0, len(X_data), batch_size):
        ctx_list = [torch.from_numpy(seq.astype(np.float32)) for seq in X_data[i:i + batch_size]]
        fr = pipeline.predict(
            context=ctx_list,
            prediction_length=prediction_length,
            num_samples=num_samples
        )
        if isinstance(fr, torch.Tensor):
            # Only take the mean of the first step (t=0) along the samples dimension
            means = fr[:, 0, :].mean(dim=1).cpu().numpy()
        else:
            means = np.array([np.asarray(f)[0].mean() for f in fr])
        preds.extend(means.tolist())
    return np.array(preds, dtype=np.float32)

# ========================= Dataset and time-based split =========================
class ChronosWindowDataset(Dataset):
    """Convert windowed sequences (X, y) to Chronos tokens (order preserved)."""
    def __init__(self, X: np.ndarray, y: np.ndarray, chronos_tokenizer, prediction_length: int = 1) -> None:
        self.X = X.astype(np.float32)
        self.y = y.astype(np.float32).reshape(-1)
        self.tokenizer = chronos_tokenizer
        self.pred_len = int(prediction_length)

    def __len__(self) -> int:
        return self.X.shape[0]

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        context = torch.from_numpy(self.X[idx])
        input_ids, attention_mask, scale = self.tokenizer.context_input_transform(context.unsqueeze(0))
        future_target = torch.tensor(self.y[idx: idx + 1]).unsqueeze(0)
        labels, labels_mask = self.tokenizer.label_input_transform(future_target, scale)
        labels[labels_mask == 0] = -100
        return {
            "input_ids": input_ids.squeeze(0),
            "attention_mask": attention_mask.squeeze(0),
            "labels": labels.squeeze(0),
        }


def load_npz_dataset(npz_path: str) -> Dict[str, Any]:
    """Load .npz data (contains X, y, meta for different windows and splits)."""
    data = np.load(npz_path, allow_pickle=True)
    return {k: data[k] for k in data.files}


def extract_split(data: Dict[str, Any], window: int, split: str) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]:
    """Extract X, y, meta DataFrame for a given window and split (train/test)."""
    X = data[f"X_{split}_{window}"]
    y = data[f"y_{split}_{window}"]
    meta_raw = data.get(f"meta_{split}_{window}")
    if meta_raw is None:
        meta = pd.DataFrame({"PERMNO": np.arange(len(X))})
    else:
        if hasattr(meta_raw, "item"):
            meta = pd.DataFrame(meta_raw.item())
        else:
            meta = pd.DataFrame(meta_raw)
    return X, y, meta


def time_based_val_split(X: np.ndarray, y: np.ndarray, meta: pd.DataFrame, val_ratio: float = 0.2) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray], Tuple[pd.DataFrame, pd.DataFrame]]:
    """Split train/val sets in time order (no shuffle)."""
    n = len(X)
    val_start = int(math.floor(n * (1.0 - val_ratio)))
    X_tr, y_tr = X[:val_start], y[:val_start]
    X_va, y_va = X[val_start:], y[val_start:]
    meta_tr, meta_va = meta.iloc[:val_start].reset_index(drop=True), meta.iloc[val_start:].reset_index(drop=True)
    return (X_tr, y_tr), (X_va, y_va), (meta_tr, meta_va)


def get_dynamic_batch_size(window: int, base: int = 512) -> int:
    """Set batch size dynamically based on window length (longer sequence, smaller batch)."""
    if window <= 5:
        return base
    elif window <= 21:
        return 128
    elif window <= 252:
        return 64
    elif window <= 512:
        return 32
    return max(base // 16, 16)

# ========================= Ordered Trainer + LoRA config =========================
class OrderedTrainer(Trainer):
    """Trainer that disables shuffling for train/eval, strictly preserves time order."""
    def get_train_dataloader(self) -> DataLoader:  # type: ignore[override]
        num_workers = getattr(self.args, "dataloader_num_workers", 0)
        dl_kwargs = dict(
            dataset=self.train_dataset,
            batch_size=self.args.per_device_train_batch_size,
            shuffle=False,
            collate_fn=self.data_collator,
            num_workers=num_workers,
            pin_memory=getattr(self.args, "dataloader_pin_memory", False),
            drop_last=getattr(self.args, "dataloader_drop_last", False),
        )
        if num_workers and num_workers > 0:
            dl_kwargs["persistent_workers"] = True
            dl_kwargs["prefetch_factor"] = 4
        return DataLoader(**dl_kwargs)

    def get_eval_dataloader(self, eval_dataset=None) -> DataLoader:  # type: ignore[override]
        dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
        num_workers = getattr(self.args, "dataloader_num_workers", 0)
        dl_kwargs = dict(
            dataset=dataset,
            batch_size=self.args.per_device_eval_batch_size,
            shuffle=False,
            collate_fn=self.data_collator,
            num_workers=num_workers,
            pin_memory=getattr(self.args, "dataloader_pin_memory", False),
            drop_last=False,
        )
        if num_workers and num_workers > 0:
            dl_kwargs["persistent_workers"] = True
            dl_kwargs["prefetch_factor"] = 4
        return DataLoader(**dl_kwargs)


def build_lora_config(r: int = 8, alpha: int = 16, dropout: float = 0.05, target_modules: Optional[List[str]] = None) -> LoraConfig:
    """Build LoRA config; by default applies to T5 attention/FFN key projections."""
    if target_modules is None:
        target_modules = ["q", "k", "v", "o", "wi", "wo"]
    return LoraConfig(
        r=r,
        lora_alpha=alpha,
        lora_dropout=dropout,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM,
        target_modules=target_modules,
    )


def bind_strip_num_items(model):
    """Wrap forward to remove incompatible kwargs injected by chronos, only keep accepted args."""
    import inspect
    original_forward = model.forward
    sig = inspect.signature(original_forward)
    accepted = set(sig.parameters.keys())
    def wrapped_forward(*args, **kwargs):
        for bad in [
            'num_items',
            'num_items_in_batch',
            'num_samples',
            'prediction_length',
            'context',
            'scales',
        ]:
            kwargs.pop(bad, None)
        kwargs = {k: v for k, v in kwargs.items() if k in accepted}
        return original_forward(*args, **kwargs)
    model.forward = wrapped_forward
    return model



import torch

def get_precision_and_optim() -> tuple[bool, bool, bool, str]:
    use_bf16 = False
    use_fp16 = False
    allow_tf32 = False
    optim_name = "adamw_torch"
    if torch.cuda.is_available():
        try:
            major, _ = torch.cuda.get_device_capability(0)
        except Exception:
            major = 0
        allow_tf32 = major >= 8
        try:
            use_bf16 = torch.cuda.is_bf16_supported()
        except Exception:
            use_bf16 = major >= 8
        use_fp16 = not use_bf16
        try:
            optim_name = "adamw_torch_fused"
        except Exception:
            optim_name = "adamw_torch"
    return use_bf16, use_fp16, allow_tf32, optim_name


def configure_torch_backends(allow_tf32: bool) -> None:
    if torch.cuda.is_available():
        try:
            torch.backends.cuda.matmul.allow_tf32 = bool(allow_tf32)
        except Exception:
            pass
        try:
            torch.backends.cudnn.benchmark = True
        except Exception:
            pass
        try:
            from torch.backends.cuda import sdp_kernel
            sdp_kernel.enable_flash(True)
            sdp_kernel.enable_mem_efficient(True)
            sdp_kernel.enable_math(False)
        except Exception:
            pass
    try:
        torch.set_float32_matmul_precision("high")
    except Exception:
        pass


def create_trainer(
    model,
    train_ds: Dataset,
    val_ds: Dataset,
    output_dir: str,
    per_device_train_batch_size: int,
    per_device_eval_batch_size: int,
    learning_rate: float,
    weight_decay: float,
    num_train_epochs: int,
    patience: int,
    logging_steps: int = 200,
    eval_strategy: str = "epoch",
    save_strategy: str = "epoch",
    dataloader_num_workers: int = 0,
    data_collator=None,
    warmup_ratio: float = 0.10,
) -> Trainer:
    """Build Trainer (with early stopping), no shuffle, and enable mixed precision and high performance settings."""
    use_bf16, use_fp16, allow_tf32, optim_name = get_precision_and_optim()
    configure_torch_backends(allow_tf32)

    num_workers = dataloader_num_workers if dataloader_num_workers > 0 else (4 if torch.cuda.is_available() else 0)
    pin_memory = True if torch.cuda.is_available() else False

    try:
        eval_accum = 64 if (torch.cuda.is_available() and "a100" in torch.cuda.get_device_name(0).lower()) else 32
    except Exception:
        eval_accum = 32

    try:
        args = TrainingArguments(
            output_dir=output_dir,
            per_device_train_batch_size=per_device_train_batch_size,
            per_device_eval_batch_size=per_device_eval_batch_size,
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            num_train_epochs=num_train_epochs,
            warmup_ratio=warmup_ratio,
            max_grad_norm=1.0,
            logging_dir=os.path.join(output_dir, "logs"),
            logging_steps=logging_steps,
            eval_strategy=IntervalStrategy(eval_strategy),
            save_strategy=IntervalStrategy(save_strategy),
            save_total_limit=1,
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            greater_is_better=False,
            report_to=[],
            remove_unused_columns=False,
            dataloader_num_workers=num_workers,
            dataloader_pin_memory=pin_memory,
            dataloader_drop_last=False,
            fp16=use_fp16,
            bf16=use_bf16,
            tf32=allow_tf32,
            gradient_accumulation_steps=1,
            gradient_checkpointing=True,
            group_by_length=False,
            optim=optim_name,
            eval_accumulation_steps=eval_accum,
        )
    except TypeError:
        args = TrainingArguments(
            output_dir=output_dir,
            per_device_train_batch_size=per_device_train_batch_size,
            per_device_eval_batch_size=per_device_eval_batch_size,
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            num_train_epochs=num_train_epochs,
            warmup_ratio=warmup_ratio,
            max_grad_norm=1.0,
            logging_dir=os.path.join(output_dir, "logs"),
            logging_steps=logging_steps,
            evaluation_strategy=IntervalStrategy(eval_strategy),
            save_strategy=IntervalStrategy(save_strategy),
            save_total_limit=1,
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            greater_is_better=False,
            report_to=[],
            remove_unused_columns=False,
            dataloader_num_workers=num_workers,
            dataloader_pin_memory=pin_memory,
            dataloader_drop_last=False,
            fp16=use_fp16,
            bf16=use_bf16,
            gradient_accumulation_steps=1,
            gradient_checkpointing=True,
            group_by_length=False,
            optim=optim_name,
            eval_accumulation_steps=eval_accum,
        )

    trainer = OrderedTrainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        data_collator=(data_collator or default_data_collator),
        callbacks=[EarlyStoppingCallback(early_stopping_patience=patience)],
    )

    try:
        trainer.label_names = ["labels"]
    except Exception:
        try:
            trainer.label_names = []
        except Exception:
            pass

    try:
        model.gradient_checkpointing_enable()
    except Exception:
        pass

    return trainer


def is_a100() -> bool:
    if not torch.cuda.is_available():
        return False
    try:
        name = torch.cuda.get_device_name(0).lower()
        if "a100" in name:
            return True
    except Exception:
        pass
    try:
        major, _ = torch.cuda.get_device_capability(0)
        total_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        return (major >= 8) and (total_gb >= 35)
    except Exception:
        return False


def maybe_torch_compile(model):
    """Only try torch.compile if explicitly enabled; default off to avoid Inductor/Quantization errors."""
    USE_TORCH_COMPILE = os.environ.get("USE_TORCH_COMPILE", "0") == "1"
    if not (USE_TORCH_COMPILE and is_a100()):
        return model
    try:
        import torch._dynamo
        torch._dynamo.config.suppress_errors = True
        compiled = torch.compile(model, mode="reduce-overhead", fullgraph=False, backend="inductor")
        return compiled
    except Exception:
        return model

import math

def get_colab_memory_hint(default_bs: int) -> int:
    """Scale batch size based on GPU availability and memory.
    - No GPU: return default
    - T4/V100(≈16GB): ×1.0~1.5
    - A100(40GB): ×2
    """
    if not torch.cuda.is_available():
        return default_bs
    try:
        total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    except Exception:
        total = 16
    if total >= 35:
        mult = 2.0
    elif total >= 22:
        mult = 1.5
    else:
        mult = 1.0
    return int(max(1, round(default_bs * mult)))

def adapt_per_window_cfg_for_colab(cfg_dict: dict) -> dict:
    """Scale batch size for GPU/Colab (A100≈40GB doubles), keep unchanged for CPU."""
    try:
        if not torch.cuda.is_available():
            return cfg_dict
        new_cfg = {}
        for w, cfg in cfg_dict.items():
            base_train = int(cfg.get('train_bs', 256))
            base_eval  = int(cfg.get('eval_bs', max(256, base_train)))
            train_bs = get_colab_memory_hint(base_train)
            eval_bs  = min(1024, max(base_eval, train_bs * 2))
            ncfg = dict(cfg)
            ncfg['train_bs'] = train_bs
            ncfg['eval_bs']  = eval_bs
            new_cfg[w] = ncfg
        return new_cfg
    except Exception:
        return cfg_dict

In [4]:
# ========================= Global Config & Tokenizer/Collator =========================
set_seed(42)
ensure_chronos_import()

from chronos import BaseChronosPipeline, ChronosConfig

# Model path and base model
base_model_id = "amazon/chronos-t5-large"
windows = [5, 21, 252, 512]

# Data path (time order not shuffled)
default_data_path = "/content/drive/MyDrive/ERP Data/all_window_datasets_unscaled.npz"
assert os.path.exists(default_data_path), "Dataset not found. Please ensure all_window_datasets_unscaled.npz exists on Drive."
data_path = os.environ.get("ERP_DATA_PATH", default_data_path)

# Save directories
root_dir = "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results"
save_models_dir = os.path.join(root_dir, "models")
save_preds_dir = os.path.join(root_dir, "predictions")
save_results_dir = root_dir
for d in [save_models_dir, save_preds_dir, save_results_dir]:
    os.makedirs(d, exist_ok=True)

# Device and base pipeline (for inferring tokenizer config)
device = get_device()
print(f"Device: {device}")
pipeline = BaseChronosPipeline.from_pretrained(
    base_model_id,
    device_map="mps" if (device.type == "mps") else ("auto" if torch.cuda.is_available() else None),
    torch_dtype=torch.float32,
)

# Build training config from base config (prediction_length=1)
base_cfg: ChronosConfig = pipeline.model.config  # type: ignore
train_cfg = ChronosConfig(
    tokenizer_class=base_cfg.tokenizer_class,
    tokenizer_kwargs=base_cfg.tokenizer_kwargs,
    context_length=base_cfg.context_length,
    prediction_length=1,
    n_tokens=base_cfg.n_tokens,
    n_special_tokens=base_cfg.n_special_tokens,
    pad_token_id=base_cfg.pad_token_id,
    eos_token_id=base_cfg.eos_token_id,
    use_eos_token=base_cfg.use_eos_token,
    model_type=base_cfg.model_type,
    num_samples=base_cfg.num_samples,
    temperature=base_cfg.temperature,
    top_k=base_cfg.top_k,
    top_p=base_cfg.top_p,
)
train_tokenizer = train_cfg.create_tokenizer()

# Collator: right pad input_ids in batch, adjust labels length
class ChronosPadCollator:
    def __init__(self, pad_token_id: int, label_len_expected: int):
        self.pad_token_id = pad_token_id
        self.label_len_expected = int(label_len_expected)
    def __call__(self, features: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        max_inp = max(f["input_ids"].shape[0] for f in features)
        input_ids, attention_masks, labels = [], [], []
        for f in features:
            Li = f["input_ids"].shape[0]
            pad_inp = max_inp - Li
            input_ids.append(torch.nn.functional.pad(f["input_ids"], (0, pad_inp), value=self.pad_token_id))
            attention_masks.append(torch.nn.functional.pad(f["attention_mask"].to(torch.long), (0, pad_inp), value=0))
            lbl = f["labels"]
            Lexp = self.label_len_expected
            if lbl.shape[0] > Lexp:
                lbl = lbl[-Lexp:]
            elif lbl.shape[0] < Lexp:
                lbl = torch.nn.functional.pad(lbl, (0, Lexp - lbl.shape[0]), value=-100)
            labels.append(lbl)
        return {
            "input_ids": torch.stack(input_ids, dim=0),
            "attention_mask": torch.stack(attention_masks, dim=0),
            "labels": torch.stack(labels, dim=0),
        }

label_len_expected = train_cfg.prediction_length + (1 if (train_cfg.use_eos_token and train_cfg.model_type=="seq2seq") else 0)
pad_collator = ChronosPadCollator(pad_token_id=train_tokenizer.config.pad_token_id, label_len_expected=label_len_expected)

print(f"Loading dataset: {data_path}")
data = load_npz_dataset(data_path)

Device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/2.84G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

Loading dataset: /content/drive/MyDrive/ERP Data/all_window_datasets_unscaled.npz


In [5]:

# ========================= Empirical hyperparameters (per window) =========================
per_window_cfg = {
    5:   dict(epochs=5, patience=2, lr=5e-4, wd=0.005, r=8,  alpha=16,
              dropout=0.05, warmup_ratio=0.10, train_bs=512, eval_bs=512),
    21:  dict(epochs=5, patience=2, lr=6e-4, wd=0.005, r=8,  alpha=16,
              dropout=0.05, warmup_ratio=0.10, train_bs=512, eval_bs=512),
    252: dict(epochs=5, patience=2, lr=3e-4, wd=0.010, r=16, alpha=32,
              dropout=0.07, warmup_ratio=0.10, train_bs=128, eval_bs=256),
    512: dict(epochs=5, patience=2, lr=2e-4, wd=0.010, r=16, alpha=32,
              dropout=0.08, warmup_ratio=0.10, train_bs=64,  eval_bs=128),
}
per_window_cfg = adapt_per_window_cfg_for_colab(per_window_cfg)

from transformers import AutoModelForSeq2SeqLM

DO_TEST_EVAL = False

results_summary: List[Dict[str, Any]] = []

for ws in windows:
    cfg = per_window_cfg[ws]

    X_train, y_train, meta_train = extract_split(data, ws, split="train")
    X_test, y_test, meta_test = extract_split(data, ws, split="test")
    (X_tr, y_tr), (X_va, y_va), _ = time_based_val_split(X_train, y_train, meta_train, val_ratio=0.2)

    base_cfg: ChronosConfig = pipeline.model.config  # type: ignore
    ws_cfg = ChronosConfig(
        tokenizer_class=base_cfg.tokenizer_class,
        tokenizer_kwargs=base_cfg.tokenizer_kwargs,
        context_length=base_cfg.context_length,
        prediction_length=1,
        n_tokens=base_cfg.n_tokens,
        n_special_tokens=base_cfg.n_special_tokens,
        pad_token_id=base_cfg.pad_token_id,
        eos_token_id=base_cfg.eos_token_id,
        use_eos_token=base_cfg.use_eos_token,
        model_type=base_cfg.model_type,
        num_samples=base_cfg.num_samples,
        temperature=base_cfg.temperature,
        top_k=base_cfg.top_k,
        top_p=base_cfg.top_p,
    )
    ws_tokenizer = ws_cfg.create_tokenizer()

    ds_tr = ChronosWindowDataset(X_tr, y_tr, ws_tokenizer, prediction_length=1)
    ds_va = ChronosWindowDataset(X_va, y_va, ws_tokenizer, prediction_length=1)

    base = AutoModelForSeq2SeqLM.from_pretrained(base_model_id)
    base = bind_strip_num_items(base)
    lora_cfg = build_lora_config(r=cfg["r"], alpha=cfg["alpha"], dropout=cfg["dropout"], target_modules=["q","k","v","o","wi","wo"])
    peft_model = get_peft_model(base, lora_cfg)
    try:
        peft_model.print_trainable_parameters()
    except Exception:
        pass
    num_trainable = sum(p.requires_grad for p in peft_model.parameters())
    if num_trainable == 0:
        fallback_targets = ["q", "k", "v", "o", "wi_0", "wi_1", "wo"]
        lora_cfg_fb = build_lora_config(r=cfg["r"], alpha=cfg["alpha"], dropout=cfg["dropout"], target_modules=fallback_targets)
        peft_model = get_peft_model(base, lora_cfg_fb)
        try:
            peft_model.print_trainable_parameters()
        except Exception:
            pass
    try:
        peft_model.enable_input_require_grads()
    except Exception:
        pass
    peft_model = maybe_torch_compile(peft_model)
    try:
        peft_model.config.use_cache = False
    except Exception:
        pass

    out_dir_ws = os.path.join(save_models_dir, f"chronos_t5_large_lora_w{ws}")
    os.makedirs(out_dir_ws, exist_ok=True)

    trainer_ws = create_trainer(
        model=peft_model,
        train_ds=ds_tr,
        val_ds=ds_va,
        output_dir=out_dir_ws,
        per_device_train_batch_size=cfg["train_bs"],
        per_device_eval_batch_size=cfg["eval_bs"],
        learning_rate=cfg["lr"],
        weight_decay=cfg["wd"],
        num_train_epochs=cfg["epochs"],
        patience=cfg["patience"],
        logging_steps=200,
        eval_strategy="epoch",
        save_strategy="epoch",
        dataloader_num_workers=0,
        data_collator=pad_collator,
    )

    train_output = trainer_ws.train()

    adapter_dir = os.path.join(out_dir_ws, "chronos_t5_lora_adapter"); os.makedirs(adapter_dir, exist_ok=True)
    peft_model.save_pretrained(adapter_dir)

    if DO_TEST_EVAL:
        base_infer = AutoModelForSeq2SeqLM.from_pretrained(base_model_id)
        peft_infer = PeftModel.from_pretrained(base_infer, adapter_dir).to(device)
        peft_infer = maybe_torch_compile(peft_infer)
        peft_infer.eval()
        try:
            peft_infer.config.use_cache = True
        except Exception:
            pass
        pipeline.model.model = peft_infer

        @torch.no_grad()
        def batch_predict_with_pipeline(X: np.ndarray, pipeline, batch_size: int = 256, num_samples: int = 10, prediction_length: int = 1) -> np.ndarray:
            preds: List[float] = []
            for i in tqdm(range(0, len(X), batch_size), desc="Batch Inference"):
                ctx_list = [torch.from_numpy(seq.astype(np.float32)) for seq in X[i:i + batch_size]]
                fr = pipeline.predict(context=ctx_list, prediction_length=prediction_length, num_samples=num_samples)
                means = fr.mean(dim=(1, 2)).cpu().numpy() if isinstance(fr, torch.Tensor) else np.array([np.array(f).mean() for f in fr])
                preds.extend(means.tolist())
            return np.array(preds, dtype=np.float32)

        y_pred = batch_predict_with_pipeline(X_test, pipeline, batch_size=cfg["eval_bs"], num_samples=10, prediction_length=1).reshape(-1)
        permnos = meta_test["PERMNO"].values if "PERMNO" in meta_test.columns else None
        k_features = X_test.shape[1]
        metrics = regression_metrics(y_true=y_test.reshape(-1), y_pred=y_pred.reshape(-1), k=k_features, meta=meta_test, permnos=permnos)

        pred_df = pd.DataFrame({
            "PERMNO": meta_test.get("PERMNO", pd.Series([np.nan] * len(y_pred))),
            "DATE": meta_test.get("DATE", meta_test.get("date", pd.Series([np.nan] * len(y_pred)))),
            "y_true": y_test.reshape(-1),
            "y_pred": y_pred,
        })
        csv_path = os.path.join(save_preds_dir, f"chronos_t5_large_lora_w{ws}.csv")
        pred_df.to_csv(csv_path, index=False)

        results_summary.append({"Window": ws, **metrics, "pred_path": csv_path, "adapter_dir": adapter_dir})

metrics_csv = os.path.join(save_results_dir, "chronos_t5_large_lora_per_window_metrics.csv")
pd.DataFrame(results_summary).to_csv(metrics_csv, index=False)

Using per-window empirical hyperparameters.
Using Colab-adaptive batch sizes:
window=5: train_bs=1024, eval_bs=1024
window=21: train_bs=1024, eval_bs=1024
window=252: train_bs=256, eval_bs=512
window=512: train_bs=128, eval_bs=256

===== Training LoRA for window=5 =====
trainable params: 8,650,752 || all params: 717,614,080 || trainable%: 1.2055
Start fine-tuning (single window)...


Epoch,Training Loss,Validation Loss
1,No log,3.391183
2,4.659300,3.386115
3,3.380100,3.385463
4,3.375300,3.385317
5,3.375300,3.386373


[Saved] LoRA adapter: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w5/chronos_t5_lora_adapter

===== Training LoRA for window=21 =====
trainable params: 8,650,752 || all params: 717,614,080 || trainable%: 1.2055
Start fine-tuning (single window)...


Epoch,Training Loss,Validation Loss
1,No log,3.326613
2,4.721500,3.32128
3,3.315400,3.32062
4,3.312900,3.320474
5,3.312900,3.321683


[Saved] LoRA adapter: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w21/chronos_t5_lora_adapter

===== Training LoRA for window=252 =====
trainable params: 17,301,504 || all params: 726,264,832 || trainable%: 2.3823
Start fine-tuning (single window)...


Epoch,Training Loss,Validation Loss
1,3.3453,3.309152
2,3.3124,3.30616
3,3.2935,3.305461
4,3.2899,3.304948
5,3.2836,3.304564


[Saved] LoRA adapter: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w252/chronos_t5_lora_adapter

===== Training LoRA for window=512 =====
trainable params: 17,301,504 || all params: 726,264,832 || trainable%: 2.3823
Start fine-tuning (single window)...


Epoch,Training Loss,Validation Loss
1,3.2548,3.29371
2,3.1858,3.289315
3,3.2629,3.287551
4,3.233,3.28632
5,3.1393,3.286705


[Saved] LoRA adapter: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w512/chronos_t5_lora_adapter
[Save] Metrics saved to: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/chronos_t5_large_lora_per_window_metrics.csv


In [5]:
def run_chronos_portfolio_backtest(start_year=2016, end_year=2024, window_sizes=None, model_names=None,
                                   npz_path="/content/drive/MyDrive/ERP Data/all_window_datasets_unscaled.npz"):
    """
    Portfolio simulation (daily prediction, next-day rebalance):
        1. Use Chronos T5 Large model for zero-shot prediction
        2. Daily prediction to daily signal
        3. Daily portfolio construction (T+1 rebalance, strict permno alignment)
        4. Separate summary metrics and time series data
    """
    if window_sizes is None:
        window_sizes = [5, 21, 252, 512]
    if model_names is None:
        model_names = ["chronos large"]

    print("Starting Daily Rebalance Portfolio Backtesting Simulation")

    backtester = PortfolioBacktester()
    datasets = load_datasets(npz_path)

    summary_results = []
    daily_series_data = []
    pred_rows = []

    WEIGHT_SCHEMES = ["VW", "EW"]

    for window in window_sizes:
        print(f"Processing window size: {window}")

        try:
            from transformers import AutoModelForSeq2SeqLM
            adapter_dir = f"/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w{window}/chronos_t5_lora_adapter"
            base_infer = AutoModelForSeq2SeqLM.from_pretrained("amazon/chronos-t5-large")
            peft_infer = PeftModel.from_pretrained(base_infer, adapter_dir).to(device).eval()
            try:
                peft_infer.config.use_cache = True
            except Exception:
                pass
            pipeline.model.model = peft_infer
            print(f"[INFO] Loaded LoRA adapter for window={window} from: {adapter_dir}")
            print(f"[INFO] Backtest inference model: LoRA (window={window})")
        except Exception as e:
            print(f"[WARN] Could not load LoRA adapter for window={window}: {e}. Using base model.")
            print(f"[INFO] Backtest inference model: BASE (window={window})")

        X_test = datasets[f"X_test_{window}"]
        y_test = datasets[f"y_test_{window}"]
        meta_test_dict = datasets[f"meta_test_{window}"].item()
        meta_test = pd.DataFrame.from_dict(meta_test_dict)

        permnos_test = meta_test["PERMNO"].values
        meta_test["signal_date"]  = pd.to_datetime(meta_test["date"])
        meta_test["ret_date"]     = pd.to_datetime(meta_test["ret_date"])
        market_caps = meta_test.get("MKTCAP", np.ones(len(permnos_test)))

        meta_test['date'] = pd.to_datetime(meta_test["date"])
        dates_test = meta_test['signal_date']

        for model_name in model_names:
            for scheme in WEIGHT_SCHEMES:
                all_y_true   = []
                all_y_pred   = []
                all_permnos  = []
                all_meta     = []
                print(f"  Model: {model_name}, Scheme: {scheme}")

                portfolio_daily_data = {
                    'long_only': {'returns': [], 'turnovers': [], 'dates': []},
                    'short_only': {'returns': [], 'turnovers': [], 'dates': []},
                    'long_short': {'returns': [], 'turnovers': [], 'dates': []}
                }

                prev_portfolio_data = {'long_only': None, 'short_only': None, 'long_short': None}

                signals_buf = {}

                for year in range(start_year, min(end_year + 1, 2025)):
                    print(f"  Processing year: {year}")

                    year_mask = (dates_test.dt.year == year)
                    if not np.any(year_mask):
                        continue

                    X_year = X_test[year_mask]
                    y_year = y_test[year_mask]
                    permnos_year = permnos_test[year_mask]
                    market_caps_year = market_caps[year_mask]
                    dates_year = dates_test[year_mask]
                    ret_dates_year = meta_test.loc[year_mask, 'ret_date'].values

                    batch_size = get_batch_size(window)
                    predictions_year = chronos_rolling_prediction(
                        pipeline=pipeline,
                        X_data=X_year,
                        batch_size=batch_size,
                        prediction_length=1
                    )

                    df_quarter = pd.DataFrame({
                        'signal_date': dates_year,
                        'ret_date': ret_dates_year,
                        'permno': permnos_year,
                        'market_cap': market_caps_year,
                        'actual_return': y_year,
                        'prediction': predictions_year
                    })

                    if scheme == 'VW':
                        df_q_save = df_quarter[['signal_date','ret_date','permno',
                                                'actual_return','prediction','market_cap']].copy()
                        df_q_save.rename(columns={'actual_return':'y_true',
                                                  'prediction':'y_pred'}, inplace=True)
                        df_q_save['model']  = model_name
                        df_q_save['window'] = window
                        pred_rows.append(df_q_save)

                    all_y_true.append(df_quarter['actual_return'].values)
                    all_y_pred.append(df_quarter['prediction'].values)
                    all_permnos.append(df_quarter['permno'].values)
                    all_meta.append(meta_test.loc[year_mask, :])

                    for signal_date, sig_grp in df_quarter.groupby('signal_date'):

                        daily_signals = (
                            sig_grp.groupby('permno')['prediction'].mean()
                                  .to_frame('prediction')
                                  .join(sig_grp.groupby('permno')['market_cap'].mean())
                        )
                        signals_buf[signal_date] = daily_signals

                        prev_date = signal_date - pd.tseries.offsets.BDay(1)
                        if prev_date not in signals_buf:
                            continue

                        sigs = signals_buf.pop(prev_date)

                        ret_grp = df_quarter[df_quarter['ret_date'] == signal_date]
                        if len(ret_grp) == 0:
                            continue

                        daily_actual_returns = (
                            ret_grp.groupby('permno')['actual_return']
                                   .mean()
                                   .reindex(sigs.index, fill_value=0)
                                   .values
                        )
                        daily_permnos = sigs.index.values

                        portfolios_data = backtester.create_portfolios_with_permno_tracking(
                            signals      = sigs['prediction'].values,
                            market_caps  = sigs['market_cap'].values,
                            permnos      = daily_permnos,
                            weight_scheme= scheme
                        )

                        for portfolio_type in ['long_only', 'short_only', 'long_short']:
                            portfolio_info = portfolios_data[portfolio_type]

                            portfolio_return, aligned_returns = backtester.calculate_aligned_portfolio_return(
                                portfolio_weights=portfolio_info['weights'],
                                portfolio_permnos=portfolio_info['permnos'],
                                actual_returns=daily_actual_returns,
                                actual_permnos=daily_permnos
                            )

                            if prev_portfolio_data[portfolio_type] is not None:
                                prev_w_ser = pd.Series(
                                    prev_portfolio_data[portfolio_type]['weights'],
                                    index=prev_portfolio_data[portfolio_type]['permnos']
                                )
                                cur_w_ser = pd.Series(
                                    portfolio_info['weights'],
                                    index=portfolio_info['permnos']
                                )

                                prev_r_ser = pd.Series(
                                    prev_portfolio_data[portfolio_type]['aligned_returns'],
                                    index=prev_portfolio_data[portfolio_type]['permnos']
                                )

                                aligned_prev_w = prev_w_ser.reindex(cur_w_ser.index, fill_value=0).values
                                aligned_prev_r = prev_r_ser.reindex(cur_w_ser.index, fill_value=0).values
                                aligned_cur_w = cur_w_ser.values

                                turnover = backtester.calc_turnover(
                                    w_t  = aligned_prev_w,
                                    r_t  = aligned_prev_r,
                                    w_tp1= aligned_cur_w
                                )
                            else:
                                turnover = np.sum(np.abs(portfolio_info['weights']))

                            portfolio_daily_data[portfolio_type]['returns'].append(portfolio_return)
                            portfolio_daily_data[portfolio_type]['turnovers'].append(turnover)
                            portfolio_daily_data[portfolio_type]['dates'].append(signal_date)

                            prev_portfolio_data[portfolio_type] = {
                                'weights'        : portfolio_info['weights'],
                                'permnos'        : portfolio_info['permnos'],
                                'aligned_returns': aligned_returns
                            }

                for portfolio_type in ['long_only', 'short_only', 'long_short']:
                    portfolio_data = portfolio_daily_data[portfolio_type]

                    if len(portfolio_data['returns']) > 0:
                        metrics = backtester.calculate_metrics(
                            returns=portfolio_data['returns'],
                            turnover_series=portfolio_data['turnovers']
                        )

                        rets = np.array(portfolio_data['returns'])
                        tovs = np.array(portfolio_data['turnovers'])

                        for tc in TC_GRID:
                            tag = TC_TAG[tc]
                            adj = rets - tovs * tc

                            ann_ret = adj.mean() * 252
                            ann_vol = adj.std(ddof=1) * np.sqrt(252)
                            sharpe  = ann_ret / ann_vol if ann_vol > 0 else 0

                            cum_adj = np.cumprod(1 + adj)
                            mdd = ((cum_adj - np.maximum.accumulate(cum_adj)) /
                                   np.maximum.accumulate(cum_adj)).min()

                            metrics[f'{tag}_annual_return'] = ann_ret
                            metrics[f'{tag}_annual_vol']    = ann_vol
                            metrics[f'{tag}_sharpe']        = sharpe
                            metrics[f'{tag}_max_drawdown']  = mdd

                        summary_results.append({
                            'scheme': scheme,
                            'model': model_name,
                            'window': window,
                            'portfolio_type': portfolio_type,
                            **metrics
                        })

                        rets_arr = np.array(portfolio_data['returns'])
                        tovs_arr = np.array(portfolio_data['turnovers'])
                        cum_no_tc = np.log1p(rets_arr).cumsum()

                        tc_ret_dict = {}
                        tc_cum_dict = {}
                        for tc in TC_GRID:
                            tag = TC_TAG[tc]
                            r = rets_arr - tovs_arr * tc
                            tc_ret_dict[tag] = r
                            tc_cum_dict[tag] = np.log1p(r).cumsum()

                        for i, date in enumerate(portfolio_data['dates']):
                            row = {
                                'scheme'        : scheme,
                                'model'         : model_name,
                                'window'        : window,
                                'portfolio_type': portfolio_type,
                                'date'          : str(date),
                                'return'        : rets_arr[i],
                                'turnover'      : tovs_arr[i],
                                'cumulative'    : cum_no_tc[i],
                            }
                            for tag in TC_TAG.values():
                                row[f'{tag}_return']     = tc_ret_dict[tag][i]
                                row[f'{tag}_cumulative'] = tc_cum_dict[tag][i]

                            daily_series_data.append(row)

                if scheme == "VW" and len(all_y_true) > 0:
                    y_all    = np.concatenate(all_y_true)
                    yhat_all = np.concatenate(all_y_pred)
                    perm_all = np.concatenate(all_permnos)
                    meta_all = pd.concat(all_meta, ignore_index=True)

                    k = X_test.shape[1]

                    m1_metrics = overall_interval_metrics_method1(
                        y_all, yhat_all, k,
                        permnos_all=perm_all,
                        meta_all=meta_all
                    )

                    full_pred_df = pd.concat(pred_rows, ignore_index=True)
                    full_pred_df['signal_date'] = pd.to_datetime(full_pred_df['signal_date'], errors='coerce')

                    cur = full_pred_df.loc[
                        (full_pred_df['window'] == window) &
                        (full_pred_df['model'] == model_name),
                        ['signal_date', 'y_true', 'y_pred']
                    ].dropna()

                    if len(cur) >= 30:
                        mean_ic, t_ic, pos_ic, _ = calc_ic_daily(cur, method='spearman')
                    else:
                        mean_ic, t_ic, pos_ic = np.nan, np.nan, np.nan

                    m1_metrics['RankIC_mean']  = mean_ic
                    m1_metrics['RankIC_t']     = t_ic
                    m1_metrics['RankIC_pos%']  = pos_ic

                    save_metrics(m1_metrics, name=model_name, window=window,
                        path="/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_metrics.csv")

    summary_df = pd.DataFrame(summary_results)
    daily_df = pd.DataFrame(daily_series_data) if daily_series_data else pd.DataFrame()

    tc_columns = [c for c in summary_df.columns if c.startswith('tc')]
    summary_df[tc_columns] = summary_df[tc_columns].fillna(0.0)

    def save_split_by_scheme(df, base_filename):
        if df.empty:
            print(f"Warning: DataFrame is empty, skipping save for {base_filename}")
            return None, None

        vw_df = df[df['scheme'] == 'VW']
        ew_df = df[df['scheme'] == 'EW']

        out_dir = "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results"
        os.makedirs(out_dir, exist_ok=True)
        vw_filename = os.path.join(out_dir, f"{base_filename}_VW.csv")
        ew_filename = os.path.join(out_dir, f"{base_filename}_EW.csv")

        vw_df.to_csv(vw_filename, index=False)
        ew_df.to_csv(ew_filename, index=False)

        print(f"VW results saved to {vw_filename}")
        print(f"EW results saved to {ew_filename}")

        return vw_filename, ew_filename

    save_split_by_scheme(summary_df, "portfolio_results_daily_rebalance")

    if not daily_df.empty:
        save_split_by_scheme(daily_df, "portfolio_daily_series")

    if pred_rows:
        out_dir = "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results"
        os.makedirs(out_dir, exist_ok=True)
        pred_df = pd.concat(pred_rows, ignore_index=True)
        pred_df.to_csv(os.path.join(out_dir, "predictions_daily.csv"), index=False)
        print(f"Saved {len(pred_df)} prediction rows to predictions_daily.csv")

    print(f"Generated {len(summary_results)} portfolio summary records")
    print(f"Generated {len(daily_series_data)} daily series records")

    return summary_df, daily_df, backtester

print("Starting Chronos T5-Large Portfolio Backtesting...")

START_YEAR = globals().get("START_YEAR", 2016)
END_YEAR   = globals().get("END_YEAR", 2024)
WINDOW_SIZES = globals().get("WINDOW_SIZES", [5, 21, 252, 512])

summary_results, daily_series, backtester = run_chronos_portfolio_backtest(
    start_year=START_YEAR,
    end_year=END_YEAR,
    window_sizes=WINDOW_SIZES,
    npz_path="/content/drive/MyDrive/ERP Data/all_window_datasets_unscaled.npz"
)

print("\n" + "="*60)
print("CHRONOS T5-BASE PORTFOLIO BACKTESTING RESULTS")
print("="*60)

print("\nSummary Results:")
try:
    print(summary_results.round(4))
except Exception:
    print(summary_results)

Starting Chronos T5-Large Portfolio Backtesting...
Starting Daily Rebalance Portfolio Backtesting Simulation
Processing window size: 5
[INFO] Loaded LoRA adapter for window=5 from: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w5/chronos_t5_lora_adapter
[INFO] Backtest inference model: LoRA (window=5)
  Model: chronos large, Scheme: VW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024


  .apply(lambda g: g['y_pred'].corr(g['y_true'], method=method))


Metrics saved for chronos large_w5 to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_metrics.csv
  Model: chronos large, Scheme: EW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024
Processing window size: 21
[INFO] Loaded LoRA adapter for window=21 from: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w21/chronos_t5_lora_adapter
[INFO] Backtest inference model: LoRA (window=21)
  Model: chronos large, Scheme: VW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024


  .apply(lambda g: g['y_pred'].corr(g['y_true'], method=method))


Metrics saved for chronos large_w21 to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_metrics.csv
  Model: chronos large, Scheme: EW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024
Processing window size: 252
[INFO] Loaded LoRA adapter for window=252 from: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w252/chronos_t5_lora_adapter
[INFO] Backtest inference model: LoRA (window=252)
  Model: chronos large, Scheme: VW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024


  .apply(lambda g: g['y_pred'].corr(g['y_true'], method=method))


Metrics saved for chronos large_w252 to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_metrics.csv
  Model: chronos large, Scheme: EW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024
Processing window size: 512
[INFO] Loaded LoRA adapter for window=512 from: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/models/chronos_t5_large_lora_w512/chronos_t5_lora_adapter
[INFO] Backtest inference model: LoRA (window=512)
  Model: chronos large, Scheme: VW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024


  .apply(lambda g: g['y_pred'].corr(g['y_true'], method=method))


Metrics saved for chronos large_w512 to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_metrics.csv
  Model: chronos large, Scheme: EW
  Processing year: 2016
  Processing year: 2017
  Processing year: 2018
  Processing year: 2019
  Processing year: 2020
  Processing year: 2021
  Processing year: 2022
  Processing year: 2023
  Processing year: 2024
VW results saved to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_results_daily_rebalance_VW.csv
EW results saved to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_results_daily_rebalance_EW.csv
VW results saved to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_VW.csv
EW results saved to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_EW.csv
Saved 443400 prediction rows to pre

In [6]:

# ---------- Main function for 5-factor regression ----------
def run_factor_regression(port_ret, factors, use_excess=True):
    df = pd.concat([port_ret, factors], axis=1, join='inner').dropna()
    df.columns = ['ret'] + list(factors.columns)

    if use_excess:
        y = df['ret'].values
    else:
        y = df['ret'].values - df['rf'].values

    X = df[['mktrf','smb','hml','rmw','cma','umd']].values
    X = sm.add_constant(X)

    model = sm.OLS(y, X)
    res = model.fit()
    alpha = res.params[0]          # daily alpha
    resid_std = res.resid.std(ddof=1)

    ir_daily = alpha / resid_std          # daily IR
    ir_annual = ir_daily * np.sqrt(252)   # annualized IR

    y_hat = np.asarray(res.fittedvalues)

    out = {
        'N_obs'            : len(y),
        'alpha_daily'      : alpha,
        'alpha_annual'     : alpha*252,
        't_alpha'          : res.tvalues[0],
        'IR_daily'         : ir_daily,
        'IR_annual'        : ir_annual,
        'R2_zero'          : r2_zero(y, y_hat),
    }

    factor_names = ['MKT','SMB','HML','RMW','CMA','UMD']
    for i, fac in enumerate(factor_names, start=1):
        out[f'beta_{fac}'] = res.params[i]
        out[f't_{fac}']    = res.tvalues[i]

    return out

# ---------- 3. Batch run (EW/VW, three portfolio types) ----------
def batch_factor_analysis(
    daily_df: pd.DataFrame,
    factors_path: str,
    scheme: str,
    tc_levels=(0, 5, 10, 20, 40),
    portfolio_types=('long_only','short_only','long_short'),
    model_filter=None,
    window_filter=None,
    gross_only=False,            # If True, only calculate tc=0
    out_dir='/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/factor_IR_results',
):
    """
    Generate a CSV containing IR results.
    gross_only=True  → only tc=0; False → all tc_levels.
    """
    import os
    os.makedirs(out_dir, exist_ok=True)

    fac = (pd.read_csv(factors_path, parse_dates=['date'])
             .set_index('date')
             .sort_index())

    sub = daily_df[daily_df['scheme'] == scheme].copy()
    if model_filter is not None:
        sub = sub[sub['model'].isin(model_filter)]
    if window_filter is not None:
        sub = sub[sub['window'].isin(window_filter)]

    tc_iter = (0,) if gross_only else tc_levels
    results = []

    for (model, win, ptype), g in sub.groupby(['model','window','portfolio_type']):
        g = g.sort_values('date').set_index(pd.to_datetime(g['date']))

        for tc in tc_iter:
            col = 'return' if tc == 0 else f'tc{tc}_return'
            if col not in g.columns:
                continue
            port_ret = g[col]
            stats = run_factor_regression(port_ret, fac, use_excess=True)
            stats.update({
                'scheme'        : scheme,
                'model'         : model,
                'window'        : win,
                'portfolio_type': ptype,
                'tc_bps'        : tc,
            })
            results.append(stats)

    df_out = pd.DataFrame(results)[[
        'scheme','model','window','portfolio_type','tc_bps','N_obs',
        'alpha_daily','alpha_annual','t_alpha',
        'IR_daily','IR_annual','R2_zero',
        'beta_MKT','t_MKT','beta_SMB','t_SMB',
        'beta_HML','t_HML','beta_RMW','t_RMW',
        'beta_CMA','t_CMA','beta_UMD','t_UMD'
    ]]

    tag = 'gross' if gross_only else 'net'
    fname = f'5_factor_analysis_{scheme}_{tag}.csv'
    df_out.to_csv(os.path.join(out_dir, fname), index=False)
    return df_out



def run_all_factor_tests(vw_csv="/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_VW.csv",
                         ew_csv="/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_EW.csv",
                         factor_csv="/content/drive/MyDrive/ERP Data/5_Factors_Plus_Momentum.csv",
                         save_dir="/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results",
                         y_is_excess=True,
                         hac_lags=5,
                         save_txt=True):
    vw_df = pd.read_csv(vw_csv)
    ew_df = pd.read_csv(ew_csv)

    vw_gross = batch_factor_analysis(
        vw_df, factor_csv, scheme='VW', gross_only=True)
    vw_net   = batch_factor_analysis(
        vw_df, factor_csv, scheme='VW', gross_only=False)

    ew_gross = batch_factor_analysis(
        ew_df, factor_csv, scheme='EW', gross_only=True)
    ew_net   = batch_factor_analysis(
        ew_df, factor_csv, scheme='EW', gross_only=False)

    return vw_gross, vw_net, ew_gross, ew_net


vw_gross, vw_net, ew_gross, ew_net = run_all_factor_tests()
rf_file = "/content/drive/MyDrive/ERP Data/CRSP_2016_2024_top50_with_exret.csv"
vw_file = "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_VW.csv"
ew_file = "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_EW.csv"

rf_df = pd.read_csv(rf_file, usecols=["date", "rf"])
rf_df["date"] = pd.to_datetime(rf_df["date"])
rf_dict = dict(zip(rf_df["date"], rf_df["rf"]))


def adjust_returns_with_rf_grouped(file_path, output_path):
    df = pd.read_csv(file_path)
    df["date"] = pd.to_datetime(df["date"], format='mixed', dayfirst=True)

    return_cols = [col for col in df.columns if "return" in col and "cumul" not in col]

    order = ["long_only", "short_only", "long_short"]
    df["portfolio_type"] = pd.Categorical(df["portfolio_type"], categories=order, ordered=True)

    df_list = []
    for _, group in df.groupby(["scheme", "model", "window", "portfolio_type"], sort=False):
        group = group.sort_values("date").copy()
        for col in return_cols:
            group[col] = group.apply(lambda row: row[col] + rf_dict.get(row["date"], 0), axis=1)

            cum_col = col.replace("return", "cumulative")
            group[cum_col] = np.log1p(group[col]).cumsum()
        df_list.append(group)

    df_new = pd.concat(df_list).sort_values(["scheme", "model", "window", "portfolio_type", "date"])
    df_new.to_csv(output_path, index=False)

adjust_returns_with_rf_grouped(vw_file, "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_VW_with_rf.csv")
adjust_returns_with_rf_grouped(ew_file, "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_EW_with_rf.csv")

sp500 = yf.download("^GSPC", start="2016-01-01", end="2024-12-31")
price_col = "Adj Close" if "Adj Close" in sp500.columns else "Close"
sp500["daily_return"] = sp500[price_col].pct_change().fillna(0)
sp500["cum_return"] = np.cumsum(np.log1p(sp500["daily_return"]))
sp500 = sp500[["cum_return"]]
sp500.index = pd.to_datetime(sp500.index)

files = [
    ("VW", "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_VW_with_rf.csv"),
    ("EW", "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_EW_with_rf.csv")
]
tc_levels = [0, 5, 10, 20, 40]
windows = [5, 21, 252, 512]
strategies = ["long_only", "short_only", "long_short"]

output_dir = "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_figures"
os.makedirs(output_dir, exist_ok=True)

crisis_periods = [
    (datetime(2018, 6, 1), datetime(2019, 1, 1), "US-China Trade War"),
    (datetime(2020, 2, 1), datetime(2020, 7, 1), "COVID-19"),
    (datetime(2022, 2, 1), datetime(2022, 6, 1), "Russia-Ukraine War"),
    (datetime(2023, 1, 1), datetime(2023, 4, 1), "US Bank Crisis"),
]

def plot_comparison_styled(df, scheme, tc, window):
    plt.figure(figsize=(15, 12))
    model_names = df["model"].unique()
    colors = plt.cm.tab10(np.linspace(0, 1, len(model_names)))

    offset_step = 0.02

    for i, strat in enumerate(strategies, 1):
        ax = plt.subplot(3, 1, i)

        plt.plot(sp500.index, sp500["cum_return"],
                 color="black", lw=2.5, label="S&P500 (Total Return)", zorder=10)

        for idx, model_name in enumerate(model_names):
            sub = df[(df["window"] == window) &
                     (df["portfolio_type"] == strat) &
                     (df["model"] == model_name)].sort_values("date")
            if sub.empty:
                continue

            if tc == 0:
                ret_col = "return"
            else:
                ret_col = f"tc{tc}_return"

            if ret_col not in sub.columns:
                continue

            log_cum = np.cumsum(np.log1p(sub[ret_col].values))

            y_shift = idx * offset_step
            plt.plot(sub["date"], log_cum + y_shift,
                     label=f"{model_name} ({strat.replace('_',' ').title()})",
                     lw=2, color=colors[idx], alpha=0.9)

        for start, end, label in crisis_periods:
            ax.axvspan(start, end, color='grey', alpha=0.3)
            ax.text(start + pd.Timedelta(days=10),
                    ax.get_ylim()[1]*0.92, label, fontsize=8, color='grey')
        ax.xaxis.set_major_locator(mdates.YearLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
        ax.set_ylabel("Cumulative log return (start = 0)")
        ax.set_title(f"{scheme} | Window={window} | Strategy={strat} | TC={tc} bps")
        ax.grid(alpha=0.3)
        plt.xticks(rotation=30)
        plt.legend(bbox_to_anchor=(1.04, 1), loc='upper left', fontsize=8)

    plt.tight_layout()
    fname = f"{scheme}_window{window}_TC{tc}.png"
    plt.savefig(os.path.join(output_dir, fname), dpi=300, bbox_inches='tight')
    plt.close()


for scheme, file_path in files:
    df = pd.read_csv(file_path)
    df["date"] = pd.to_datetime(df["date"])
    for tc in tc_levels:
        for window in windows:
            plot_comparison_styled(df, scheme, tc, window)

print(f"All figures have been generated and saved to: {output_dir}/")

metrics_df = pd.read_csv("/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_metrics.csv")[["Model", "Window", "R²_zero"]]
metrics_df.rename(columns={"Model": "model", "Window": "window"}, inplace=True)

for fname in ["/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_results_daily_rebalance_VW.csv", "/content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_results_daily_rebalance_EW.csv"]:
    df = pd.read_csv(fname)

    df = df.merge(metrics_df, on=["model", "window"], how="left")

    rows = []
    for _, row in df.iterrows():
        r2 = float(row["R²_zero"]) if not pd.isna(row["R²_zero"]) else 0.0
        if row["portfolio_type"] == "long_only":
            d_sr, sr_star = delta_sharpe(r2, SR_MKT_EX)
            row["ΔSharpe"]  = d_sr
            row["Sharpe*"]  = sr_star
            row["baseline"] = f"SPX_excess ({SR_MKT_EX:.2f})"
        else:
            d_sr, sr_star = delta_sharpe(r2, 0)
            row["ΔSharpe"]  = d_sr
            row["Sharpe*"]  = sr_star
            row["baseline"] = "cash (0)"
        rows.append(row)

    pd.DataFrame(rows).to_csv(fname, index=False)

[Saved] 5_factor_analysis_VW_gross.csv 
[Saved] 5_factor_analysis_VW_net.csv 
[Saved] 5_factor_analysis_EW_gross.csv 
[Saved] 5_factor_analysis_EW_net.csv 


  for _, group in df.groupby(["scheme", "model", "window", "portfolio_type"], sort=False):


Finish: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_VW_with_rf.csv


  for _, group in df.groupby(["scheme", "model", "window", "portfolio_type"], sort=False):
  sp500 = yf.download("^GSPC", start="2016-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Finish: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_daily_series_EW_with_rf.csv





All figures have been generated and saved to: /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_figures/
[Update] ΔSharpe has been written to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_results_daily_rebalance_VW.csv
[Update] ΔSharpe has been written to /content/drive/MyDrive/chronos_t5_large_project_portfolio(FineTuning)/chronos_results/portfolio_results_daily_rebalance_EW.csv
