In [None]:
# ========== Basic Libraries ==========
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import warnings
import gc
from datetime import datetime
import matplotlib.dates as mdates
import joblib  # joblib for loading scaler
warnings.filterwarnings('ignore')

# ========== Evaluation Metrics ==========
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import Pipeline

# ========== Statistics ==========
from scipy.stats import f as f_dist
import yfinance as yf
import statsmodels.api as sm

# ========== Hyperparameter Tuning ==========
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)

# ========== Visualization ==========
import matplotlib.pyplot as plt

# ========== Global Configuration ==========
np.random.seed(42)
torch.manual_seed(42)

# ========== MPS Acceleration Configuration ==========
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Metal Performance Shaders)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA")
else:
    device = torch.device("cpu")
    print("Using CPU")

torch.backends.cudnn.benchmark = True
torch.set_float32_matmul_precision('medium')


Using MPS (Metal Performance Shaders)


In [None]:
# ========== Data Loading ==========
def load_datasets(npz_path="/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/all_window_datasets_scaled.npz"):
    data = np.load(npz_path, allow_pickle=True) 
    datasets = {}
    for key in data.files:
        datasets[key] = data[key]
    return datasets

def prepare_sequences(X, y, lookback):
    """Prepare sequence data for N-BEATS training"""
    if len(X) < lookback:
        return np.array([]), np.array([])
    
    X_seq, y_seq = [], []
    
    for i in range(lookback, len(X)):
        X_seq.append(X[i])
        if y is not None:
            y_seq.append(y[i])
    
    if y is None:
        return np.array(X_seq), np.array([])
    return np.array(X_seq), np.array(y_seq)


In [None]:
# ========== Core Function Definitions ==========
def r2_zero(y_true, y_pred):
    """
    Compute zero-based R² (baseline is zero).
    y_true: true values (N,)
    y_pred: predicted values (N,)
    """
    rss = np.sum((y_true - y_pred)**2)  
    tss = np.sum(y_true**2)            
    return 1 - rss / tss

def calc_ic_daily(df, method='spearman'):
    """
    Calculate daily cross-sectional RankIC.
    df: must contain ['signal_date','y_true','y_pred']
    """
    ics = (df.groupby('signal_date')
             .apply(lambda g: g['y_pred'].corr(g['y_true'], method=method))
             .dropna())
    mean_ic = ics.mean()
    std_ic  = ics.std(ddof=1)
    t_ic    = mean_ic / (std_ic / np.sqrt(len(ics))) if std_ic > 0 else np.nan
    pos_ratio = (ics > 0).mean()
    return mean_ic, t_ic, pos_ratio, ics

def annual_sharpe(rets, freq=252):
    mu = float(np.mean(rets)) * freq
    sd = float(np.std(rets, ddof=1)) * np.sqrt(freq)
    return mu / sd if sd > 0 else 0

def delta_sharpe(r2_zero: float, sr_base: float):
    """
    If r2_zero <= 0 or r2_zero >= 1, ΔSharpe = 0, Sharpe* = sr_base.
    Otherwise, compute by the original formula.
    """
    if (r2_zero <= 0) or (r2_zero >= 1):
        return 0.0, sr_base
    sr_star = np.sqrt(sr_base ** 2 + r2_zero) / np.sqrt(1 - r2_zero)
    return sr_star - sr_base, sr_star

rf_file = "/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/CRSP_2016_2024_top50_with_exret.csv"
rf_df = pd.read_csv(rf_file, usecols=["date", "rf"])
rf_df["date"] = pd.to_datetime(rf_df["date"])
rf_df = rf_df.drop_duplicates("date").set_index("date").sort_index()
rf_series = rf_df["rf"].astype(float)

px = yf.download("^GSPC", start="2016-01-01", end="2024-12-31")["Close"]
sp_ret = px.pct_change().dropna()
rf_align = rf_series.reindex(sp_ret.index).fillna(method="ffill")
sp_excess = sp_ret.values - rf_align.values

SR_MKT_EX = annual_sharpe(sp_excess)
print(f"[INFO] S&P500 Excess Sharpe (2016-24) = {SR_MKT_EX:.3f}")

def calc_directional_metrics(y_true, y_pred, permnos=None):
    """
    
    - Sample-level sign prediction
    - If grouped by stock, compute Overall, Up, Down for each stock and then average
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    if permnos is None:
        s_true = np.sign(y_true)
        s_pred = np.sign(y_pred)
        mask = s_true != 0
        s_true = s_true[mask]
        s_pred = s_pred[mask]

        overall_acc = np.mean(s_true == s_pred)

        up_mask = s_true > 0
        down_mask = s_true < 0
        up_acc = np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else 0
        down_acc = np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else 0

    else:
        df = pd.DataFrame({"permno": permnos, "yt": y_true, "yp": y_pred})
        overall_accs = []
        up_accs = []
        down_accs = []

        for _, g in df.groupby("permno"):
            s_true = np.sign(g["yt"].values)
            s_pred = np.sign(g["yp"].values)
            mask = s_true != 0
            s_true = s_true[mask]
            s_pred = s_pred[mask]
            if len(s_true) == 0:
                continue
            overall_accs.append(np.mean(s_true == s_pred))

            up_mask = s_true > 0
            down_mask = s_true < 0
            up_accs.append(np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else np.nan)
            down_accs.append(np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else np.nan)

        overall_acc = np.nanmean(overall_accs)
        up_acc = np.nanmean(up_accs)
        down_acc = np.nanmean(down_accs)

    return overall_acc, up_acc, down_acc

def regression_metrics(y_true, y_pred, k, meta=None, permnos=None):
    """
    Combined regression metrics:
    - Regression metrics
    - Pointwise directional accuracy
    - Market cap group metrics
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    n = len(y_true)

    r2 = r2_zero(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)

    dir_acc, up_acc, down_acc = calc_directional_metrics(y_true, y_pred, permnos)

    metrics = {
        "R²_zero": r2,
        "RMSE": rmse,
        "MAE": mae,
        "MSE": mse,
        "Directional Accuracy": dir_acc,
        "Up_Directional_Acc": up_acc,
        "Down_Directional_Acc": down_acc
    }

    if meta is not None and "MKTCAP_PERCENTILE" in meta:
        top_mask = meta["MKTCAP_PERCENTILE"] >= 0.75
        bottom_mask = meta["MKTCAP_PERCENTILE"] <= 0.25

        if np.any(top_mask):
            yt_top = y_true[top_mask]
            yp_top = y_pred[top_mask]
            perm_top = permnos[top_mask] if permnos is not None else None
            r2_top = r2_zero(yt_top, yp_top)
            rmse_top = np.sqrt(mean_squared_error(yt_top, yp_top))
            mae_top = mean_absolute_error(yt_top, yp_top)
            mse_top = mean_squared_error(yt_top, yp_top)
            dir_top, up_top, down_top = calc_directional_metrics(yt_top, yp_top, perm_top)
            metrics.update({
                "Top25_R2_zero": r2_top,
                "Top25_RMSE": rmse_top,
                "Top25_MAE": mae_top,
                "Top25_MSE": mse_top,
                "Top25_Dir_Acc": dir_top,
                "Top25_Up_Acc": up_top,
                "Top25_Down_Acc": down_top
            })

        if np.any(bottom_mask):
            yt_bot = y_true[bottom_mask]
            yp_bot = y_pred[bottom_mask]
            perm_bot = permnos[bottom_mask] if permnos is not None else None
            r2_bot = r2_zero(yt_bot, yp_bot)
            rmse_bot = np.sqrt(mean_squared_error(yt_bot, yp_bot))
            mae_bot = mean_absolute_error(yt_bot, yp_bot)
            mse_bot = mean_squared_error(yt_bot, yp_bot)
            dir_bot, up_bot, down_bot = calc_directional_metrics(yt_bot, yp_bot, perm_bot)
            metrics.update({
                "Bottom25_R2_zero": r2_bot,
                "Bottom25_RMSE": rmse_bot,
                "Bottom25_MAE": mae_bot,
                "Bottom25_MSE": mse_bot,
                "Bottom25_Dir_Acc": dir_bot,
                "Bottom25_Up_Acc": up_bot,
                "Bottom25_Down_Acc": down_bot
            })

    return metrics

def f_statistic(y_true, y_pred, k):
    """Return F statistic and corresponding p-value."""
    n   = len(y_true)
    rss = np.sum((y_true - y_pred) ** 2)
    tss = np.sum(y_true ** 2)
    r2  = 1 - rss / tss
    if (r2 <= 0) or (n <= k):
        return 0.0, 1.0
    F = (r2 / k) / ((1 - r2) / (n - k))
    p = f_dist.sf(F, k, n - k)
    return F, p

def overall_interval_metrics_method1(y_all, yhat_all, k, permnos_all=None, meta_all=None):
    """
    Method 1: Compute metrics for the entire interval at once (all samples from 2016-2024 concatenated).
    Returns a dict that can be passed to save_metrics().
    """
    base = regression_metrics(
        y_true=y_all, 
        y_pred=yhat_all, 
        k=k, 
        meta=meta_all, 
        permnos=permnos_all
    )
    F, p = f_statistic(y_all, yhat_all, k)
    base["F_stat"]     = F
    base["F_pvalue"]   = p
    base["N_obs"] = len(y_all)
    
    delta_cash, sr_star_cash = delta_sharpe(base["R²_zero"], sr_base=0)
    base["ΔSharpe_cash"]      = delta_cash
    base["Sharpe*_cash"]      = sr_star_cash

    delta_mkt , sr_star_mkt  = delta_sharpe(base["R²_zero"], sr_base=SR_MKT_EX)
    base["ΔSharpe_mkt"]       = delta_mkt
    base["Sharpe*_mkt"]       = sr_star_mkt
    
    return base

def sortino_ratio(rets, freq=252):
    """Compute Sortino Ratio."""
    downside = rets[rets < 0]
    if len(downside) == 0:
        return np.inf
    mu = rets.mean() * freq
    sigma = np.sqrt((downside ** 2).mean()) * np.sqrt(freq)
    return mu / sigma

def cvar(rets, alpha=0.95):
    """Compute CVaR."""
    q = np.quantile(rets, 1 - alpha)
    return rets[rets <= q].mean()

def save_predictions(model_name, window_size, y_true, y_pred, permnos, path="predictions/"):
    os.makedirs(path, exist_ok=True)
    
    df = pd.DataFrame({
        "PERMNO": permnos,
        "y_true": y_true,
        "y_pred": y_pred
    })

    filename = f"{model_name}_w{window_size}.csv"
    df.to_csv(os.path.join(path, filename), index=False)
    print(f"[Save] {filename}")

def save_metrics(metrics_dict, name, window, path="results.csv"):
    """Save evaluation metrics."""
    row = pd.DataFrame([metrics_dict])
    row.insert(0, "Model", name)
    row.insert(1, "Window", window)

    if os.path.exists(path):
        df = pd.read_csv(path)
        df = df[~((df["Model"] == name) & (df["Window"] == window))]
        df = pd.concat([df, row], ignore_index=True)
        df.to_csv(path, index=False)
        print(f"[Update] Metrics updated for {name} w={window}")
    else:
        row.to_csv(path, index=False)
        print(f"[Create] New metrics file created with {name} w={window}")

def get_quarter_periods(start_year=2015, end_year=2024):
    """Generate quarterly periods."""
    quarters = []
    for year in range(start_year, end_year + 1):
        for q in range(1, 5):
            quarters.append((year, q))
    return quarters

def load_y_scaler(window, scaler_dir="/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/"):
    """
    Load the y-scaler for the given window.
    
    Args:
        window: window size (5, 21, 252, 512)
        scaler_dir: directory where scaler files are stored
    
    Returns:
        The loaded scaler object, or None if loading fails.
    """
    scaler_path = os.path.join(scaler_dir, f"scaler_y_window_{window}.pkl")
    try:
        scaler = joblib.load(scaler_path)
        print(f"[Load] Y scaler loaded for window {window}: {scaler_path}")
        return scaler
    except Exception as e:
        print(f"[Error] Failed to load Y scaler for window {window}: {e}")
        return None

def inverse_transform_predictions(predictions, scaler):
    """
    Inverse transform predictions using the provided scaler.
    
    Args:
        predictions: standardized predictions (numpy array)
        scaler: scaler object for inverse transformation
    
    Returns:
        Inverse transformed predictions
    """
    if scaler is None:
        print("[Warning] No scaler provided, returning original predictions")
        return predictions
    
    try:
        if predictions.ndim == 1:
            predictions_2d = predictions.reshape(-1, 1)
        else:
            predictions_2d = predictions
        
        inversed = scaler.inverse_transform(predictions_2d).flatten()
        print(f"[Info] Inverse transformed {len(predictions)} predictions")
        return inversed
    except Exception as e:
        print(f"[Error] Failed to inverse transform predictions: {e}")
        return predictions


[*********************100%***********************]  1 of 1 completed

[INFO] S&P500 Excess Sharpe (2016–24) = 0.652





In [None]:
# N-BEATS model architecture
class NBeatsBlock(nn.Module):
    def __init__(self, input_size, theta_size, basis_size, layers, layer_size):
        super().__init__()
        self.layers = nn.ModuleList([nn.Linear(input_size, layer_size)] + 
                                   [nn.Linear(layer_size, layer_size) for _ in range(layers-1)])
        self.basis_parameters = nn.Linear(layer_size, theta_size)
        self.input_size = input_size
        self.theta_size = theta_size
        self.basis_size = basis_size
        
    def forward(self, x):
        for layer in self.layers:
            x = torch.relu(layer(x))
        theta = self.basis_parameters(x)
        backcast = theta[:, :self.input_size]
        forecast = theta[:, self.input_size:self.input_size+1]  # predict 1 step
        return backcast, forecast

class NBeatsNet(nn.Module):
    def __init__(self, input_size, stacks=2, blocks_per_stack=2, layers=4, layer_size=128):
        super().__init__()
        self.input_size = input_size
        self.stacks = nn.ModuleList()
        for _ in range(stacks):
            stack = nn.ModuleList()
            for _ in range(blocks_per_stack):
                stack.append(NBeatsBlock(input_size, input_size + 1, input_size, layers, layer_size))
            self.stacks.append(stack)
    
    def forward(self, x):
        residual = x
        forecast = 0
        for stack in self.stacks:
            for block in stack:
                backcast, block_forecast = block(residual)
                residual = residual - backcast
                forecast = forecast + block_forecast
        return forecast

def train_step(model, criterion, optimizer, X_batch, y_batch):
    model.train()
    optimizer.zero_grad()
    predictions = model(X_batch).squeeze()
    loss = criterion(predictions, y_batch)
    loss.backward()
    optimizer.step()
    return loss.item()

if device.type == "cuda":
    train_step = torch.compile(train_step)


In [None]:
# ========== Hyperparameter Tuning ==========

TUNED_MODELS = ["NBEATS"]  # List of models for tuning

def tune_nbeats_with_optuna(X, y, window, n_trials=10):
    """N-BEATS hyperparameter tuning using TimeSeriesSplit"""
    if len(y) < 100:
        return {
            'stacks': 2, 
            'blocks_per_stack': 2, 
            'layers': 2, 
            'layer_size': 128, 
            'learning_rate': 0.001, 
            'batch_size': 32,
            'max_epochs': 25,
            'warm_start_epochs': 15
        }
    
    print(f"    [Hyperparameter Tuning] Running Optuna optimization for window={window}")
    print(f"    [Device Setting] Using CPU for hyperparameter tuning")
    
    tuning_device = torch.device("cpu")
    
    tscv = TimeSeriesSplit(n_splits=3)
    features_per_timestep = X.shape[1] // window
    
    def objective(trial):
        try:
            stacks = trial.suggest_int("stacks", 1, 3)
            blocks_per_stack = trial.suggest_int("blocks_per_stack", 1, 3)
            layers = 2
            layer_size = 128
            lr = trial.suggest_float("lr", 1e-5, 5e-4, log=True)
            batch_size = trial.suggest_categorical("batch_size", [32, 64])
            
            cv_scores = []
            for train_idx, val_idx in tscv.split(X):
                X_tr, X_val = X[train_idx], X[val_idx]
                y_tr, y_val = y[train_idx], y[val_idx]
                
                if len(X_tr) < 50 or len(X_val) < 10:
                    continue
                
                lookback = min(20, len(X_tr)//4)
                X_seq_tr, y_seq_tr = prepare_sequences(X_tr, y_tr, lookback)
                X_seq_val, y_seq_val = prepare_sequences(X_val, y_val, lookback)
                
                if len(X_seq_tr) == 0 or len(X_seq_val) == 0:
                    continue
                
                X_tensor_tr = torch.FloatTensor(X_seq_tr).to(tuning_device)
                y_tensor_tr = torch.FloatTensor(y_seq_tr).to(tuning_device)
                X_tensor_val = torch.FloatTensor(X_seq_val).to(tuning_device)
                y_tensor_val = torch.FloatTensor(y_seq_val).to(tuning_device)
                
                input_size = X_seq_tr.shape[-1]
                model = NBeatsNet(input_size, stacks, blocks_per_stack, layers, layer_size).to(tuning_device)
                criterion = nn.MSELoss()
                optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
                
                dataset = TensorDataset(X_tensor_tr, y_tensor_tr)
                dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=False, num_workers=0)
                
                model.train()
                for epoch in range(5):
                    for X_batch, y_batch in dataloader:
                        train_step(model, criterion, optimizer, X_batch, y_batch)
                
                model.eval()
                with torch.no_grad():
                    val_pred = model(X_tensor_val).squeeze().cpu().numpy()
                    mse = mean_squared_error(y_seq_val, val_pred)
                    cv_scores.append(mse)
                
                del model, optimizer, criterion, X_tensor_tr, y_tensor_tr, X_tensor_val, y_tensor_val
                if tuning_device.type == 'cuda':
                    torch.cuda.empty_cache()
                elif tuning_device.type == 'mps':
                    torch.mps.empty_cache()
            
            return np.mean(cv_scores) if cv_scores else float('inf')
        except Exception as e:
            return float('inf')
    
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(objective, n_trials=n_trials)
    
    print(f"    [Tuning Completed] Switching back to {device} for model training")
    
    if study.best_trial is None:
        return {
            'stacks': 2, 
            'blocks_per_stack': 2, 
            'layers':2, 
            'layer_size': 128, 
            'learning_rate': 0.001, 
            'batch_size': 32,
            'max_epochs': 25,
            'warm_start_epochs': 15
        }
    
    best_params = study.best_params.copy()
    best_params['learning_rate'] = best_params.pop('lr')
    best_params['max_epochs'] = 25
    best_params['warm_start_epochs'] = 15
    best_params['layers'] = 2
    best_params['layer_size'] = 128  
    
    print(f"    [Optuna] NBEATS best_params={best_params}")
    return best_params


In [None]:
def train_nbeats_model(X_train, y_train, X_test, y_test, best_params, max_epochs=50, window_size=None):
    """Train N-BEATS model and return aligned test data, support inverse y scaling"""
    try:
        lookback = min(20, len(X_train)//4)
        X_seq_train, y_seq_train = prepare_sequences(X_train, y_train, lookback)
        X_seq_test, y_seq_test = prepare_sequences(X_test, y_test, lookback)
        
        if len(X_seq_train) == 0:
            return None, None, None
        
        X_tensor_train = torch.FloatTensor(X_seq_train).to(device)
        y_tensor_train = torch.FloatTensor(y_seq_train).to(device)
        X_tensor_test = torch.FloatTensor(X_seq_test).to(device)
        
        input_size = X_seq_train.shape[-1]
        model = NBeatsNet(
            input_size=input_size,
            stacks=best_params['stacks'],
            blocks_per_stack=best_params['blocks_per_stack'],
            layers=best_params['layers'],
            layer_size=best_params['layer_size']
        ).to(device)
        
        criterion = nn.MSELoss()
        lr_key = 'learning_rate' if 'learning_rate' in best_params else 'lr'
        optimizer = optim.AdamW(model.parameters(), lr=best_params[lr_key], weight_decay=1e-5)
        
        dataset = TensorDataset(X_tensor_train, y_tensor_train)
        pin_memory = device.type == "cuda"
        dataloader = DataLoader(
            dataset, 
            batch_size=best_params['batch_size'], 
            shuffle=True, 
            pin_memory=pin_memory,
            num_workers=0
        )
        
        model.train()
        for epoch in range(max_epochs):
            epoch_loss = 0
            for X_batch, y_batch in dataloader:
                loss = train_step(model, criterion, optimizer, X_batch, y_batch)
                epoch_loss += loss
            
            if epoch > 10 and epoch_loss < 1e-6:
                break
        
        model.eval()
        with torch.no_grad():
            if len(X_seq_test) > 0:
                y_pred_tensor = model(X_tensor_test).squeeze()
                y_pred = y_pred_tensor.cpu().numpy()
                
                if window_size is not None:
                    y_scaler = load_y_scaler(window_size)
                    if y_scaler is not None:
                        y_pred = inverse_transform_predictions(y_pred, y_scaler)
                
                if len(y_pred) < len(y_test):
                    y_test_aligned = y_test[-len(y_pred):]
                    if window_size is not None:
                        y_scaler = load_y_scaler(window_size)
                        if y_scaler is not None:
                            y_test_aligned = inverse_transform_predictions(y_test_aligned, y_scaler)
                    return model, y_pred, y_test_aligned
                else:
                    y_pred = y_pred[:len(y_test)]
                    y_test_copy = y_test.copy()
                    if window_size is not None:
                        y_scaler = load_y_scaler(window_size)
                        if y_scaler is not None:
                            y_test_copy = inverse_transform_predictions(y_test_copy, y_scaler)
                    return model, y_pred, y_test_copy
            else:
                return model, np.array([]), np.array([])
        
    except Exception as e:
        print(f"Training failed: {e}")
        return None, None, None

def get_model(name: str, best_params=None):
    """Get default model parameters"""
    if name == "NBEATS":
        if best_params:
            return best_params
        else:
            return {
                'stacks': 2, 
                'blocks_per_stack': 2, 
                'layers': 2, 
                'layer_size': 128, 
                'learning_rate': 0.001, 
                'batch_size': 32,
                'max_epochs': 25,
                'warm_start_epochs': 15
            }
    
    raise ValueError(f"Unexpected model: {name}")

def save_model(fitted_model, model_name, window_size, path="models/"):
    """Save trained model as .pth file"""
    os.makedirs(path, exist_ok=True)
    filename = f"{model_name}_w{window_size}.pth"
    filepath = os.path.join(path, filename)
    
    if hasattr(fitted_model, 'state_dict'):
        torch.save({
            'state_dict': fitted_model.state_dict(),
            'model_name': model_name,
            'window_size': window_size
        }, filepath)
    else:
        torch.save(fitted_model, filepath)
    print(f"[Save] Model saved to {filename}")

def load_model(model_name, window_size, path="models/"):
    """Load saved model"""
    filename = f"{model_name}_w{window_size}.pth"
    filepath = os.path.join(path, filename)
    if os.path.exists(filepath):
        return torch.load(filepath, map_location=device)
    return None


In [None]:
def train_and_evaluate(model_name, window_size,
                       X_train, y_train, X_test, y_test,
                       permnos_train, permnos_test, meta=None, shared_params=None):
    """Train and evaluate N-BEATS model"""
    
    if model_name in TUNED_MODELS:
        if shared_params is None:
            print(f"[Hyperparameter Tuning] Running Optuna optimization for window={window_size}")
            best_params = tune_nbeats_with_optuna(X_train, y_train, window_size, n_trials=10)
            print(f"[Optuna] {model_name} best_params={best_params}")
            model_params = get_model(model_name, best_params)
        else:
            print(f"[Shared Parameters] Using optimized params from window=5 for window={window_size}")
            model_params = get_model(model_name, shared_params)
    else:
        model_params = get_model(model_name)

    fitted_model, y_pred, y_test_aligned = train_nbeats_model(X_train, y_train, X_test, y_test, model_params, window_size=window_size)
    
    if fitted_model is None or y_pred is None or y_test_aligned is None:
        print(f"[Skip Model] {model_name} failed to fit. Skipping.")
        return None

    if len(y_pred) == 0:
        print(f"[Skip Model] {model_name} no valid predictions. Skipping.")
        return None

    if len(y_test_aligned) < len(y_test):
        offset = len(y_test) - len(y_test_aligned)
        meta_aligned = meta.iloc[offset:] if meta is not None else None
        permnos_test_aligned = permnos_test[offset:] if permnos_test is not None else None
    else:
        meta_aligned = meta
        permnos_test_aligned = permnos_test

    k = X_test.shape[1]
    metrics = regression_metrics(y_test_aligned, y_pred, k, meta=meta_aligned, permnos=permnos_test_aligned)

    save_model(fitted_model, model_name, window_size)
    save_metrics(metrics, model_name, window_size)
    save_predictions(model_name, window_size, y_test_aligned, y_pred, permnos_test_aligned)

    print(f"Completed {model_name} w={window_size}: MSE={metrics['MSE']:.6f}, Dir_Acc={metrics['Directional Accuracy']:.4f}")
    
    if device.type == 'mps':
        torch.mps.empty_cache()
    elif device.type == 'cuda':
        torch.cuda.empty_cache()
    
    if shared_params is None and model_name in TUNED_MODELS:
        return metrics, best_params
    else:
        return metrics, None


In [None]:
class NBeatsWrapper:
    """N-BEATS model wrapper, supports extended training for seasonal mode"""
    
    def __init__(self, input_size, seq_len, stacks=2, blocks_per_stack=2, nlayers=4, 
                 hidden=128, learning_rate=0.001, batch_size=32, max_epochs=50,
                 warm_start_epochs=10, training_device=None, window_size=None):
        self.input_size = input_size
        self.seq_len = seq_len
        self.stacks = stacks
        self.blocks_per_stack = blocks_per_stack
        self.nlayers = nlayers
        self.hidden = hidden
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.max_epochs = max_epochs
        self.warm_start_epochs = warm_start_epochs
        self.training_device = training_device or device
        self.window_size = window_size  # Store window size for inverse scaling
        self.model = None
        self.optimizer = None
        self.criterion = nn.MSELoss()
        self.is_fitted = False
        
    def _create_model(self):
        """Create N-BEATS model"""
        self.model = NBeatsNet(
            input_size=self.input_size,
            stacks=self.stacks,
            blocks_per_stack=self.blocks_per_stack,
            layers=self.nlayers,
            layer_size=self.hidden
        ).to(self.training_device)
        
        self.optimizer = optim.AdamW(
            self.model.parameters(), 
            lr=self.learning_rate, 
            weight_decay=1e-5
        )
    
    def fit(self, X, y, validation_split=0.1):
        """Train the model"""
        if self.model is None:
            self._create_model()
            
        lookback = min(20, len(X)//4)
        X_seq, y_seq = prepare_sequences(X, y, lookback)
        
        if len(X_seq) == 0:
            return self
            
        split_idx = int(len(X_seq) * (1 - validation_split))
        X_train_seq, X_val_seq = X_seq[:split_idx], X_seq[split_idx:]
        y_train_seq, y_val_seq = y_seq[:split_idx], y_seq[split_idx:]
        
        X_tensor = torch.FloatTensor(X_train_seq).to(self.training_device)
        y_tensor = torch.FloatTensor(y_train_seq).to(self.training_device)
        
        dataset = TensorDataset(X_tensor, y_tensor)
        pin_memory = self.training_device.type == "cuda"
        dataloader = DataLoader(
            dataset, 
            batch_size=self.batch_size, 
            shuffle=True, 
            pin_memory=pin_memory,
            num_workers=0
        )
        
        self.model.train()
        best_loss = float('inf')
        patience = 5
        patience_counter = 0
        
        for epoch in range(self.max_epochs):
            epoch_loss = 0
            for X_batch, y_batch in dataloader:
                loss = train_step(self.model, self.criterion, self.optimizer, X_batch, y_batch)
                epoch_loss += loss
            
            avg_loss = epoch_loss / len(dataloader)
            
            if len(X_val_seq) > 0:
                val_loss = self._validate(X_val_seq, y_val_seq)
                if val_loss < best_loss:
                    best_loss = val_loss
                    patience_counter = 0
                else:
                    patience_counter += 1
                    
                if patience_counter >= patience:
                    break
            
            if epoch > 10 and avg_loss < 1e-6:
                break
        
        self.is_fitted = True
        return self
    
    def partial_fit(self, X, y, validation_split=0.1, extra_epochs=None):
        """Incremental training (warm-start)"""
        if self.model is None:
            return self.fit(X, y, validation_split)
            
        epochs = extra_epochs or self.warm_start_epochs
        
        lookback = min(20, len(X)//4)
        X_seq, y_seq = prepare_sequences(X, y, lookback)
        
        if len(X_seq) == 0:
            return self
            
        split_idx = int(len(X_seq) * (1 - validation_split))
        X_train_seq = X_seq[:split_idx]
        y_train_seq = y_seq[:split_idx]
        
        X_tensor = torch.FloatTensor(X_train_seq).to(self.training_device)
        y_tensor = torch.FloatTensor(y_train_seq).to(self.training_device)
        
        dataset = TensorDataset(X_tensor, y_tensor)
        pin_memory = self.training_device.type == "cuda"
        dataloader = DataLoader(
            dataset, 
            batch_size=self.batch_size, 
            shuffle=True, 
            pin_memory=pin_memory,
            num_workers=0
        )
        
        self.model.train()
        for epoch in range(epochs):
            for X_batch, y_batch in dataloader:
                train_step(self.model, self.criterion, self.optimizer, X_batch, y_batch)
        
        self.is_fitted = True
        return self
    
    def _validate(self, X_val, y_val):
        """Validate the model"""
        self.model.eval()
        with torch.no_grad():
            X_val_tensor = torch.FloatTensor(X_val).to(self.training_device)
            y_val_tensor = torch.FloatTensor(y_val).to(self.training_device)
            pred = self.model(X_val_tensor).squeeze()
            loss = self.criterion(pred, y_val_tensor)
            return loss.item()
    
    def predict(self, X):
        """Make predictions"""
        if self.model is None or not self.is_fitted:
            raise ValueError("Model not fitted yet")
            
        lookback = min(20, len(X)//4)
        X_seq, _ = prepare_sequences(X, None, lookback)
        
        if len(X_seq) == 0:
            return np.array([])
            
        self.model.eval()
        with torch.no_grad():
            X_tensor = torch.FloatTensor(X_seq).to(self.training_device)
            pred = self.model(X_tensor).squeeze()
            predictions = pred.cpu().numpy()
            
            # Inverse scaling for y
            if self.window_size is not None:
                y_scaler = load_y_scaler(self.window_size)
                if y_scaler is not None:
                    predictions = inverse_transform_predictions(predictions, y_scaler)
            
            return predictions

def save_nbeats_model(model, window, year, quarter, path="models/"):
    """Save N-BEATS quarterly model as .pth file"""
    os.makedirs(path, exist_ok=True)
    filename = f"NBEATS_w{window}_{year}Q{quarter}.pth"
    filepath = os.path.join(path, filename)
    
    save_dict = {
        'state_dict': model.model.state_dict() if model.model is not None else None,
        'hyper_params': {
            'input_size': model.input_size,
            'seq_len': model.seq_len,
            'stacks': model.stacks,
            'blocks_per_stack': model.blocks_per_stack,
            'nlayers': model.nlayers,
            'hidden': model.hidden,
            'learning_rate': model.learning_rate,
            'batch_size': model.batch_size,
            'max_epochs': model.max_epochs,
            'warm_start_epochs': model.warm_start_epochs,
            'window_size': model.window_size  # Store window_size
        },
        'is_fitted': model.is_fitted
    }
    
    torch.save(save_dict, filepath)
    print(f"[Save] N-BEATS model saved: {filename}")

def load_nbeats_model(window, year, quarter, training_device=None, fallback_hp=None, path="models/"):
    """Load N-BEATS quarterly model"""
    filename = f"NBEATS_w{window}_{year}Q{quarter}.pth"
    filepath = os.path.join(path, filename)
    
    if os.path.exists(filepath):
        try:
            ckpt = torch.load(filepath, map_location=training_device or device)
            
            hp = ckpt['hyper_params']
            model = NBeatsWrapper(
                input_size=hp['input_size'],
                seq_len=hp['seq_len'],
                stacks=hp['stacks'],
                blocks_per_stack=hp['blocks_per_stack'],
                nlayers=hp['nlayers'],
                hidden=hp['hidden'],
                learning_rate=hp['learning_rate'],
                batch_size=hp['batch_size'],
                max_epochs=hp['max_epochs'],
                warm_start_epochs=hp['warm_start_epochs'],
                training_device=training_device or device,
                window_size=hp.get('window_size', window)
            )
            
            if ckpt['state_dict'] is not None:
                model._create_model()
                model.model.load_state_dict(ckpt['state_dict'])
                model.is_fitted = ckpt.get('is_fitted', True)
            
            print(f"[Load] N-BEATS model loaded: {filename}")
            return model
        except Exception as e:
            print(f"[Error] Failed to load {filename}: {e}")
            return None
    
    return None

def get_train_device():
    """Prefer CUDA, then MPS, then CPU"""
    if torch.cuda.is_available():
        return torch.device("cuda")
    elif torch.backends.mps.is_available():
        return torch.device("mps")
    else:
        return torch.device("cpu")

def train_nbeats_models_expanding_quarterly(
    start_year: int = 2015,
    end_year: int = 2024,
    window_sizes: list[int] | None = None,
    npz_path: str = "/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/all_window_datasets.npz",
    n_trials_optuna: int = 10
):
    """
    Mimic LSTM/Autoformer logic:
    - Scan local files before training, skip if model exists
    - Initial tuning window: if start_year Q4 model exists, load hyperparameters, else tune with Optuna
    - Expand training set by quarter, optionally re-tune, warm-start
    """
    if window_sizes is None:
        window_sizes = [5, 21, 252, 512]

    data = load_datasets(npz_path)
    print(f"=== N-BEATS quarterly expanding training {start_year}-{end_year} ===")

    best_params_cache: dict[str, dict] = {}
    TUNED_WINDOW = 5
    TUNE_QUARTERS = {(2020, 4)}

    for window in window_sizes:
        train_device = get_train_device()
        print(f"\n>> Window = {window}")
        X_train_init = data[f"X_train_{window}"].copy()
        y_train_init = data[f"y_train_{window}"].copy()
        X_test_full  = data[f"X_test_{window}"]
        y_test_full  = data[f"y_test_{window}"]
        meta_test    = pd.DataFrame.from_dict(data[f"meta_test_{window}"].item())
        meta_test["ret_date"] = pd.to_datetime(meta_test["ret_date"])

        cache_key = f"NBEATS_w{window}"
        if window == TUNED_WINDOW:
            tuned_model = load_nbeats_model(
                window, start_year, 4,
                training_device=train_device,
                fallback_hp=None
            )
            if tuned_model is not None:
                hp = {
                    "stacks": tuned_model.stacks,
                    "blocks_per_stack": tuned_model.blocks_per_stack,
                    "layers": tuned_model.nlayers,
                    "layer_size": tuned_model.hidden,
                    "learning_rate": tuned_model.learning_rate,
                    "batch_size": tuned_model.batch_size,
                    "max_epochs": tuned_model.max_epochs,
                    "warm_start_epochs": tuned_model.warm_start_epochs,
                }
                print(f"[Skip-Optuna] hyper-params loaded from existing {start_year}Q4 model")
            else:
                print("    Optuna tuning on initial window…")
                hp = tune_nbeats_with_optuna(
                    X_train_init, y_train_init,
                    window, n_trials=n_trials_optuna
                )
        else:
            hp = None
        best_params_cache[cache_key] = hp

        nbeats_model = None
        for year, quarter in get_quarter_periods(start_year, end_year):
            if (year == start_year and quarter < 4) or (year == end_year and quarter > 3):
                continue

            existing = load_nbeats_model(
                window, year, quarter,
                training_device=train_device,
                fallback_hp=best_params_cache.get(cache_key)
            )
            if existing is not None:
                print(f"[Skip] Model already trained for window={window}, {year}Q{quarter}")
                continue

            print(f"[Window {window}] {year}Q{quarter}")

            if not (year == start_year and quarter == 4):
                py, pq = (year, quarter-1) if quarter > 1 else (year-1, 4)
                mask_prev = (
                    (meta_test["ret_date"].dt.year == py) &
                    (meta_test["ret_date"].dt.quarter == pq)
                )
                if mask_prev.any():
                    X_prev = X_test_full[mask_prev]
                    y_prev = y_test_full[mask_prev]
                    X_train_init = np.vstack([X_train_init, X_prev])
                    y_train_init = np.hstack([y_train_init, y_prev])
                    print(f"    +{mask_prev.sum()} obs from {py}Q{pq} -> train_size={len(y_train_init)}")

            hp = best_params_cache.get(cache_key)
            if hp is None:
                hp = best_params_cache[f"NBEATS_w{TUNED_WINDOW}"]
            if (year, quarter) in TUNE_QUARTERS and window == TUNED_WINDOW:
                print("    Re-tuning via Optuna…")
                hp = tune_nbeats_with_optuna(
                    X_train_init, y_train_init,
                    window, n_trials=n_trials_optuna
                )
                best_params_cache[cache_key] = hp

            model_prev = None
            if not (year == start_year and quarter == 4):
                py, pq = (year, quarter-1) if quarter > 1 else (year-1, 4)
                model_prev = load_nbeats_model(
                    window, py, pq,
                    training_device=train_device,
                    fallback_hp=hp
                )

            if model_prev is not None:
                print("    Warm-start …")
                model_prev.partial_fit(
                    X_train_init, y_train_init,
                    validation_split=0.1,
                    extra_epochs=hp.get("warm_start_epochs", 10)
                )
                nbeats_model = model_prev
            else:
                print("    Cold-start …")
                nbeats_model = NBeatsWrapper(
                    input_size         = X_train_init.shape[1],
                    seq_len            = window,
                    stacks             = hp["stacks"],
                    blocks_per_stack   = hp["blocks_per_stack"],
                    nlayers            = hp["layers"],
                    hidden             = hp["layer_size"],
                    learning_rate      = hp["learning_rate"],
                    batch_size         = hp["batch_size"],
                    max_epochs         = hp["max_epochs"],
                    warm_start_epochs  = hp.get("warm_start_epochs", 10),
                    training_device    = train_device,
                    window_size        = window
                )
                nbeats_model.fit(X_train_init, y_train_init, validation_split=0.1)

            save_nbeats_model(nbeats_model, window, year, quarter)
            gc.collect()
            if torch.backends.mps.is_available():
                torch.mps.empty_cache()
            elif torch.cuda.is_available():
                torch.cuda.empty_cache()

    print("=== All N-BEATS quarterly models trained ===")

In [None]:
# Portfolio core class
# Transaction cost settings
TC_GRID = [0.0005, 0.001, 0.002, 0.003, 0.004]  # 5, 10, 20, 30, 40 bps
TC_TAG  = {
    0.0005: "tc5",
    0.001:  "tc10", 
    0.002:  "tc20",
    0.003:  "tc30",
    0.004:  "tc40"
}

class PortfolioBacktester:
    def __init__(self):
        self.results = {}
        
    def calc_turnover(self, w_t, r_t, w_tp1):
        """Calculate turnover using the standard formula"""
        if w_t is None:
            return np.sum(np.abs(w_tp1))
        
        gross_ret = np.sum(w_t * r_t)
        if abs(1 + gross_ret) < 1e-8:
            return np.sum(np.abs(w_tp1))
        
        passive_weight = w_t * (1 + r_t) / (1 + gross_ret)
        turnover = np.sum(np.abs(w_tp1 - passive_weight))
        return turnover
    
    def create_portfolios_with_permno_tracking(self, signals, market_caps, permnos, top_pct=0.1, bottom_pct=0.1, weight_scheme="VW"):
        """
        Create portfolio weights based on signals, strictly tracking permno alignment.
        weight_scheme: 'VW' for value-weighted, 'EW' for equal-weighted
        """
        n_stocks = len(signals)
        top_n    = max(1, int(round(n_stocks * top_pct)))
        bottom_n = max(1, int(round(n_stocks * bottom_pct)))
        
        sorted_idx = np.argsort(signals)[::-1]
        
        top_idx = sorted_idx[:top_n]
        bottom_idx = sorted_idx[-bottom_n:]
        
        portfolio_data = {}
        
        long_weights = np.zeros(n_stocks)
        if len(top_idx) > 0:
            if weight_scheme == "VW":
                top_market_caps = market_caps[top_idx]
                if np.sum(top_market_caps) > 0:
                    long_weights[top_idx] = top_market_caps / np.sum(top_market_caps)
            else:
                long_weights[top_idx] = 1.0 / len(top_idx)
        
        portfolio_data['long_only'] = {
            'weights': long_weights,
            'permnos': permnos.copy(),
            'selected_permnos': permnos[top_idx] if len(top_idx) > 0 else np.array([])
        }
        
        short_weights = np.zeros(n_stocks)
        if len(bottom_idx) > 0:
            if weight_scheme == "VW":
                bottom_market_caps = market_caps[bottom_idx]
                if np.sum(bottom_market_caps) > 0:
                    short_weights[bottom_idx] = -bottom_market_caps / np.sum(bottom_market_caps)
            else:
                short_weights[bottom_idx] = -1.0 / len(bottom_idx)
        
        portfolio_data['short_only'] = {
            'weights': short_weights,
            'permnos': permnos.copy(),
            'selected_permnos': permnos[bottom_idx] if len(bottom_idx) > 0 else np.array([])
        }
        
        ls_raw = long_weights + short_weights

        gross_target = 2.0
        current_gross = np.sum(np.abs(long_weights)) + np.sum(np.abs(short_weights))
        scale = gross_target / current_gross if current_gross > 1e-8 else 0.0
        ls_weights = scale * ls_raw

        ls_selected_permnos = np.concatenate([
            permnos[top_idx] if len(top_idx) > 0 else np.array([]),
            permnos[bottom_idx] if len(bottom_idx) > 0 else np.array([])
        ])

        portfolio_data['long_short'] = {
            'weights': ls_weights,
            'permnos': permnos.copy(),
            'selected_permnos': ls_selected_permnos
        }

        return portfolio_data
    
    def calculate_aligned_portfolio_return(self, portfolio_weights, portfolio_permnos, actual_returns, actual_permnos):
        """Calculate portfolio return strictly aligned by permno"""
        aligned_returns = np.zeros(len(portfolio_permnos))
        
        return_dict = dict(zip(actual_permnos, actual_returns))
        
        for i, permno in enumerate(portfolio_permnos):
            if permno in return_dict:
                aligned_returns[i] = return_dict[permno]
        
        portfolio_return = np.sum(portfolio_weights * aligned_returns)
        return portfolio_return, aligned_returns

    def calculate_metrics(self, returns, turnover_series=None):
        """Calculate portfolio metrics - only returns summary metrics, not full series"""
        returns = np.array(returns)
        
        annual_return = np.mean(returns) * 252
        annual_vol = np.std(returns, ddof=1) * np.sqrt(252)
        sharpe = annual_return / annual_vol if annual_vol > 0 else 0
        
        log_cum = np.cumsum(np.log1p(returns))
        peak_log = np.maximum.accumulate(log_cum)
        dd_log = peak_log - log_cum
        max_drawdown = 1 - np.exp(-dd_log.max()) 
        max_1d_loss = np.min(returns) 
        
        avg_turnover = np.mean(turnover_series) if turnover_series is not None else 0
        
        sortino = sortino_ratio(returns)
        cvar95  = cvar(returns, alpha=0.95)

        result = {
            'annual_return': annual_return,
            'annual_vol': annual_vol,
            'sharpe': sharpe,
            'max_drawdown': max_drawdown,
            'max_1d_loss': max_1d_loss,
            'avg_turnover': avg_turnover,
            'sortino': sortino,
            'cvar95': cvar95
        }
        
        return result

In [None]:

def run_portfolio_simulation_daily_rebalance(start_year=2016, end_year=2024, window_sizes=None, model_names=None,
                                           npz_path="/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/all_window_datasets_scaled.npz"):
    """
Portfolio simulation (daily prediction, next-day rebalancing):
    1. Load quarterly models (trained with quarterly expanding window)
    2. Daily prediction to daily signals
    3. Daily portfolio construction (T+1 rebalancing, strict permno alignment)
    4. Separate summary metrics and time series data
    """
    if window_sizes is None:
        window_sizes = [5, 21, 252, 512]
    if model_names is None:
        model_names = ["NBEATS"]
    
    print("Starting daily rebalance portfolio backtesting simulation")
    
    backtester = PortfolioBacktester()
    datasets = load_datasets(npz_path)
    
    summary_results = []
    daily_series_data = []
    pred_rows = []
    
    WEIGHT_SCHEMES = ["VW", "EW"]
    
    y_scalers = {}
    for window in window_sizes:
        y_scalers[window] = load_y_scaler(window)
    
    for window in window_sizes:
        print(f"Processing window size: {window}")
        
        X_test = datasets[f"X_test_{window}"]
        y_test = datasets[f"y_test_{window}"]
        input_size = X_test.shape[1]
        meta_test_dict = datasets[f"meta_test_{window}"].item()
        meta_test = pd.DataFrame.from_dict(meta_test_dict)
        
        permnos_test = meta_test["PERMNO"].values
        meta_test["signal_date"]  = pd.to_datetime(meta_test["date"])
        meta_test["ret_date"]     = pd.to_datetime(meta_test["ret_date"])
        market_caps = meta_test.get("MKTCAP", np.ones(len(permnos_test)))
        
        meta_test['date'] = pd.to_datetime(meta_test["date"])
        dates_test = meta_test['signal_date']
        
        for model_name in model_names:
            for scheme in WEIGHT_SCHEMES:
                all_y_true   = []
                all_y_pred   = []
                all_permnos  = []
                all_meta     = []
                print(f"  Model: {model_name}, Scheme: {scheme}")
                
                portfolio_daily_data = {
                    'long_only': {'returns': [], 'turnovers': [], 'dates': []},
                    'short_only': {'returns': [], 'turnovers': [], 'dates': []},
                    'long_short': {'returns': [], 'turnovers': [], 'dates': []}
                }
                
                prev_portfolio_data = {'long_only': None, 'short_only': None, 'long_short': None}
                
                signals_buf = {}
                
                for year in range(start_year, min(end_year + 1, 2025)):
                    for quarter in range(1, 5):
                        # Determine model file year and quarter (T+1 logic: use previous quarter's model to predict current quarter)
                        if quarter == 1:
                            model_file_year, model_file_quarter = year - 1, 4
                        else:
                            model_file_year, model_file_quarter = year, quarter - 1
                            
                        model = load_nbeats_model(
                            window=window,
                            year=model_file_year,
                            quarter=model_file_quarter,
                            training_device=device
                        )
                        
                        if model is None:
                            print(f"      Skip: Model file not found for {model_file_year}Q{model_file_quarter}")
                            continue
                        
                        # Filter data for the current quarter
                        quarter_mask = (
                            (dates_test.dt.year == year) & 
                            (dates_test.dt.quarter == quarter)
                        )
                        if not np.any(quarter_mask):
                            continue
                        
                        X_quarter = X_test[quarter_mask]
                        y_quarter = y_test[quarter_mask]
                        permnos_quarter = permnos_test[quarter_mask]
                        market_caps_quarter = market_caps[quarter_mask]
                        dates_quarter = dates_test[quarter_mask]
                        ret_dates_quarter = meta_test.loc[quarter_mask, 'ret_date'].values
                        
                        preds = model.predict(X_quarter)
                        
                        # ========== y inverse standardization ==========
                        y_scaler = y_scalers.get(window)
                        if y_scaler is not None:
                            preds = inverse_transform_predictions(preds, y_scaler)
                            y_quarter = inverse_transform_predictions(y_quarter, y_scaler)
                        else:
                            print(f"[Warning] No Y scaler found for window {window}, using original scale")
                        # ================================

                        # Align by date (daily prediction)
                        if len(preds) < len(y_quarter):
                            gap = len(y_quarter) - len(preds)
                            dates_quarter       = dates_quarter[gap:]
                            ret_dates_quarter   = ret_dates_quarter[gap:]
                            permnos_quarter     = permnos_quarter[gap:]
                            market_caps_quarter = market_caps_quarter[gap:]
                            y_quarter           = y_quarter[gap:]

                        assert len(preds) == len(y_quarter)

                        df_quarter = pd.DataFrame({
                            'signal_date': dates_quarter,
                            'ret_date'   : ret_dates_quarter,
                            'permno'     : permnos_quarter,
                            'market_cap' : market_caps_quarter,
                            'actual_return': y_quarter,
                            'prediction'   : preds
                        })
                        
                        # Only collect in the first scheme to avoid duplication
                        if scheme == 'VW':
                            df_q_save = df_quarter[['signal_date','ret_date','permno',
                                                    'actual_return','prediction','market_cap']].copy()
                            df_q_save.rename(columns={'actual_return':'y_true',
                                                      'prediction':'y_pred'}, inplace=True)
                            df_q_save['model']  = model_name
                            df_q_save['window'] = window
                            pred_rows.append(df_q_save)
                        
                        all_y_true.append(df_quarter['actual_return'].values)
                        all_y_pred.append(df_quarter['prediction'].values)
                        all_permnos.append(df_quarter['permno'].values)
                        meta_quarter = meta_test.loc[quarter_mask].copy()
                        if len(preds) < len(meta_quarter):
                            gap = len(meta_quarter) - len(preds)
                            meta_quarter = meta_quarter.iloc[gap:]        

                        all_meta.append(meta_quarter)  

                        # Loop by date (T+1 rebalancing logic)
                        for signal_date, sig_grp in df_quarter.groupby('signal_date'):
                            # (1) Today's signals: just calculate and store in buffer, do not rebalance yet
                            daily_signals = (
                                sig_grp.groupby('permno')['prediction'].mean()
                                      .to_frame('prediction')
                                      .join(sig_grp.groupby('permno')['market_cap'].mean())
                            )
                            signals_buf[signal_date] = daily_signals

                            # (2) Only use yesterday's signals to rebalance today
                            prev_date = signal_date - pd.tseries.offsets.BDay(1)
                            if prev_date not in signals_buf:
                                continue

                            sigs = signals_buf.pop(prev_date)
                            if prev_date in signals_buf:
                                del signals_buf[prev_date]

                            # (3) Use today's actual returns for settlement (ret_date == signal_date)
                            ret_grp = df_quarter[df_quarter['ret_date'] == signal_date]
                            if len(ret_grp) == 0:
                                continue

                            daily_actual_returns = (
                                ret_grp.groupby('permno')['actual_return']
                                       .mean()
                                       .reindex(sigs.index, fill_value=0)
                                       .values
                            )
                            daily_permnos = sigs.index.values

                            # (4) Generate 3 sets of weights
                            portfolios_data = backtester.create_portfolios_with_permno_tracking(
                                signals      = sigs['prediction'].values,
                                market_caps  = sigs['market_cap'].values,
                                permnos      = daily_permnos,
                                weight_scheme= scheme
                            )
                            
                            for portfolio_type in ['long_only', 'short_only', 'long_short']:
                                portfolio_info = portfolios_data[portfolio_type]
                                
                                # Calculate strictly aligned portfolio return
                                portfolio_return, aligned_returns = backtester.calculate_aligned_portfolio_return(
                                    portfolio_weights=portfolio_info['weights'],
                                    portfolio_permnos=portfolio_info['permnos'],
                                    actual_returns=daily_actual_returns,
                                    actual_permnos=daily_permnos
                                )
                                
                                if prev_portfolio_data[portfolio_type] is not None:
                                    prev_w_ser = pd.Series(
                                        prev_portfolio_data[portfolio_type]['weights'],
                                        index=prev_portfolio_data[portfolio_type]['permnos']
                                    )
                                    cur_w_ser = pd.Series(
                                        portfolio_info['weights'],
                                        index=portfolio_info['permnos']
                                    )

                                    prev_r_ser = pd.Series(
                                        prev_portfolio_data[portfolio_type]['aligned_returns'],
                                        index=prev_portfolio_data[portfolio_type]['permnos']
                                    )

                                    aligned_prev_w = prev_w_ser.reindex(cur_w_ser.index, fill_value=0).values
                                    aligned_prev_r = prev_r_ser.reindex(cur_w_ser.index, fill_value=0).values

                                    aligned_cur_w = cur_w_ser.values

                                    turnover = backtester.calc_turnover(
                                        w_t  = aligned_prev_w,
                                        r_t  = aligned_prev_r,
                                        w_tp1= aligned_cur_w
                                    )
                                else:
                                    turnover = np.sum(np.abs(portfolio_info['weights']))
                                
                                portfolio_daily_data[portfolio_type]['returns'].append(portfolio_return)
                                portfolio_daily_data[portfolio_type]['turnovers'].append(turnover)
                                portfolio_daily_data[portfolio_type]['dates'].append(signal_date)
                                
                                prev_portfolio_data[portfolio_type] = {
                                    'weights'        : portfolio_info['weights'],
                                    'permnos'        : portfolio_info['permnos'],
                                    'aligned_returns': aligned_returns      
                                }
                
                # Calculate final metrics and store results
                for portfolio_type in ['long_only', 'short_only', 'long_short']:
                    portfolio_data = portfolio_daily_data[portfolio_type]
                    
                    if len(portfolio_data['returns']) > 0:
                        metrics = backtester.calculate_metrics(
                            returns=portfolio_data['returns'],
                            turnover_series=portfolio_data['turnovers']
                        )
                        
                        rets = np.array(portfolio_data['returns'])
                        tovs = np.array(portfolio_data['turnovers'])

                        for tc in TC_GRID:
                            tag = TC_TAG[tc]
                            adj = rets - tovs * tc

                            ann_ret = adj.mean() * 252
                            ann_vol = adj.std(ddof=1) * np.sqrt(252)
                            sharpe  = ann_ret / ann_vol if ann_vol > 0 else 0

                            cum_adj = np.cumprod(1 + adj)
                            mdd = ((cum_adj - np.maximum.accumulate(cum_adj)) /
                                   np.maximum.accumulate(cum_adj)).min()

                            metrics[f'{tag}_annual_return'] = ann_ret
                            metrics[f'{tag}_annual_vol']    = ann_vol
                            metrics[f'{tag}_sharpe']        = sharpe
                            metrics[f'{tag}_max_drawdown']  = mdd
                        
                        summary_results.append({
                            'scheme': scheme,
                            'model': model_name,
                            'window': window,
                            'portfolio_type': portfolio_type,
                            **metrics
                        })
                        
                        rets_arr = np.array(portfolio_data['returns'])
                        tovs_arr = np.array(portfolio_data['turnovers'])
                        cum_no_tc = np.log1p(rets_arr).cumsum()

                        tc_ret_dict = {}
                        tc_cum_dict = {}
                        for tc in TC_GRID:
                            tag = TC_TAG[tc]
                            r = rets_arr - tovs_arr * tc
                            tc_ret_dict[tag] = r
                            tc_cum_dict[tag] = np.log1p(r).cumsum()

                        for i, date in enumerate(portfolio_data['dates']):
                            row = {
                                'scheme'        : scheme,
                                'model'         : model_name,
                                'window'        : window,
                                'portfolio_type': portfolio_type,
                                'date'          : str(date),
                                'return'        : rets_arr[i],
                                'turnover'      : tovs_arr[i],
                                'cumulative'    : cum_no_tc[i],
                            }
                            for tag in TC_TAG.values():
                                row[f'{tag}_return']     = tc_ret_dict[tag][i]
                                row[f'{tag}_cumulative'] = tc_cum_dict[tag][i]

                            daily_series_data.append(row)

                # After processing all portfolio_types for the current scheme, calculate overall regression metrics
                if scheme == "VW" and len(all_y_true) > 0:
                    y_all    = np.concatenate(all_y_true)
                    yhat_all = np.concatenate(all_y_pred)
                    perm_all = np.concatenate(all_permnos)
                    meta_all = pd.concat(all_meta, ignore_index=True)

                    k = X_test.shape[1]

                    m1_metrics = overall_interval_metrics_method1(
                        y_all, yhat_all, k,
                        permnos_all=perm_all,
                        meta_all=meta_all
                    )

                    # Calculate cross-sectional RankIC
                    full_pred_df = pd.concat(pred_rows, ignore_index=True)
                    mean_ic, t_ic, pos_ic, _ = calc_ic_daily(full_pred_df, method='spearman')
                    m1_metrics['RankIC_mean']  = mean_ic
                    m1_metrics['RankIC_t']     = t_ic
                    m1_metrics['RankIC_pos%']  = pos_ic

                    save_metrics(m1_metrics, name=model_name, window=window,
                        path="portfolio_metrics.csv")

    # Create DataFrame and ensure all TC metrics exist
    summary_df = pd.DataFrame(summary_results)
    daily_df = pd.DataFrame(daily_series_data) if daily_series_data else pd.DataFrame()
    
    tc_columns = [c for c in summary_df.columns if c.startswith('tc')]
    summary_df[tc_columns] = summary_df[tc_columns].fillna(0.0)
    
    def save_split_by_scheme(df, base_filename):
        """Helper function to save files split by scheme"""
        if df.empty:
            print(f"Warning: DataFrame is empty, skipping save for {base_filename}")
            return None, None
            
        vw_df = df[df['scheme'] == 'VW']
        ew_df = df[df['scheme'] == 'EW']
        
        vw_filename = f"{base_filename}_VW.csv"
        ew_filename = f"{base_filename}_EW.csv"
        
        vw_df.to_csv(vw_filename, index=False)
        ew_df.to_csv(ew_filename, index=False)
        
        print(f"VW results saved to {vw_filename}")
        print(f"EW results saved to {ew_filename}")
        
        return vw_filename, ew_filename
    
    save_split_by_scheme(summary_df, "portfolio_results_daily_rebalance")
    
    if not daily_df.empty:
        save_split_by_scheme(daily_df, "portfolio_daily_series")
    
    if pred_rows:
        pred_df = pd.concat(pred_rows, ignore_index=True)
        pred_df.to_csv("predictions_daily.csv", index=False)
        print(f"Saved {len(pred_df)} prediction rows to predictions_daily.csv")
    
    print(f"Generated {len(summary_results)} portfolio summary records")
    print(f"Generated {len(daily_series_data)} daily series records")
    
    return summary_df, daily_df, backtester


In [24]:
train_nbeats_models_expanding_quarterly(
    start_year=2015,
    end_year=2024,
    window_sizes=[5, 21, 252, 512],
    n_trials_optuna=10
)

=== N-BEATS quarterly expanding training 2015-2024 ===

>> Window = 5
    Optuna tuning on initial window…
    [Hyperparameter Tuning] Running Optuna optimization for window=5
    [Device Setting] Using CPU for hyperparameter tuning
    [Tuning Completed] Switching back to mps for model training
    [Optuna] NBEATS best_params={'stacks': 1, 'blocks_per_stack': 1, 'batch_size': 64, 'learning_rate': 0.0002962151658830348, 'max_epochs': 25, 'warm_start_epochs': 15, 'layers': 2, 'layer_size': 128}
[Window 5] 2015Q4
    Cold-start …
[Save] N-BEATS model saved: NBEATS_w5_2015Q4.pth
[Window 5] 2016Q1
[Load] N-BEATS model loaded: NBEATS_w5_2015Q4.pth
    Warm-start …
[Save] N-BEATS model saved: NBEATS_w5_2016Q1.pth
[Window 5] 2016Q2
    +2956 obs from 2016Q1 -> train_size=199876
[Load] N-BEATS model loaded: NBEATS_w5_2016Q1.pth
    Warm-start …
[Save] N-BEATS model saved: NBEATS_w5_2016Q2.pth
[Window 5] 2016Q3
    +3170 obs from 2016Q2 -> train_size=203046
[Load] N-BEATS model loaded: NBEATS_w

In [25]:
run_portfolio_simulation_daily_rebalance()

Starting Daily Rebalance Portfolio Backtesting Simulation
[Load] Y scaler loaded for window 5: /Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/scaler_y_window_5.pkl
[Load] Y scaler loaded for window 21: /Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/scaler_y_window_21.pkl
[Load] Y scaler loaded for window 252: /Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/scaler_y_window_252.pkl
[Load] Y scaler loaded for window 512: /Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/scaler_y_window_512.pkl
Processing window size: 5
  Model: NBEATS, Scheme: VW
[Load] N-BEATS model loaded: NBEATS_w5_2015Q4.pth
[Load] Y scaler loaded for window 5: /Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/scaler_y_window_5.pkl
[Info] Inverse transformed 2986 predictions

(   scheme   model  window portfolio_type  annual_return  annual_vol    sharpe  \
 0      VW  NBEATS       5      long_only       0.200589    0.195665  1.025167   
 1      VW  NBEATS       5     short_only      -0.091117    0.193805 -0.470147   
 2      VW  NBEATS       5     long_short       0.109472    0.195500  0.559961   
 3      EW  NBEATS       5      long_only       0.190501    0.201918  0.943457   
 4      EW  NBEATS       5     short_only      -0.055643    0.210143 -0.264788   
 5      EW  NBEATS       5     long_short       0.134858    0.180195  0.748400   
 6      VW  NBEATS      21      long_only       0.130978    0.211384  0.619619   
 7      VW  NBEATS      21     short_only      -0.150597    0.214384 -0.702462   
 8      VW  NBEATS      21     long_short      -0.019619    0.205308 -0.095559   
 9      EW  NBEATS      21      long_only       0.105022    0.223125  0.470686   
 10     EW  NBEATS      21     short_only      -0.174878    0.227892 -0.767372   
 11     EW  NBEA

In [None]:

def run_factor_regression(port_ret, factors, use_excess=True):
    """
    Main function for 5-factor regression.
    """
    df = pd.concat([port_ret, factors], axis=1, join='inner').dropna()
    df.columns = ['ret'] + list(factors.columns)
    
    if use_excess:
        y = df['ret'].values
    else:
        y = df['ret'].values - df['rf'].values
    
    X = df[['mktrf','smb','hml','rmw','cma','umd']].values
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X)
    res = model.fit()
    alpha = res.params[0]          # daily alpha
    resid_std = res.resid.std(ddof=1)

    ir_daily = alpha / resid_std          # daily IR
    ir_annual = ir_daily * np.sqrt(252)   # annualized IR

    y_hat = np.asarray(res.fittedvalues)
    
    out = {
        'N_obs'            : len(y),
        'alpha_daily'      : alpha,
        'alpha_annual'     : alpha*252,      
        't_alpha'          : res.tvalues[0],
        'IR_daily'         : ir_daily,
        'IR_annual'        : ir_annual,
        'R2_zero'          : r2_zero(y, y_hat),
    }
    
    factor_names = ['MKT','SMB','HML','RMW','CMA','UMD']
    for i, fac in enumerate(factor_names, start=1):
        out[f'beta_{fac}'] = res.params[i]
        out[f't_{fac}']    = res.tvalues[i]
    
    return out

def batch_factor_analysis(
    daily_df: pd.DataFrame,
    factors_path: str,
    scheme: str,
    tc_levels=(0, 5, 10, 20, 40),
    portfolio_types=('long_only','short_only','long_short'),
    model_filter=None,
    window_filter=None,
    gross_only=False,            
    out_dir='factor_IR_results',
):
    """
    Generate a CSV containing IR results.
    If gross_only=True, only tc=0 is calculated; if False, all tc_levels are included.
    """
    import os
    os.makedirs(out_dir, exist_ok=True)

    fac = (pd.read_csv(factors_path, parse_dates=['date'])
             .set_index('date')
             .sort_index())

    sub = daily_df[daily_df['scheme'] == scheme].copy()
    if model_filter is not None:
        sub = sub[sub['model'].isin(model_filter)]
    if window_filter is not None:
        sub = sub[sub['window'].isin(window_filter)]

    tc_iter = (0,) if gross_only else tc_levels
    results = []

    for (model, win, ptype), g in sub.groupby(['model','window','portfolio_type']):
        g = g.sort_values('date').set_index(pd.to_datetime(g['date']))

        for tc in tc_iter:
            col = 'return' if tc == 0 else f'tc{tc}_return'
            if col not in g.columns:
                continue
            port_ret = g[col]
            stats = run_factor_regression(port_ret, fac, use_excess=True)
            stats.update({
                'scheme'        : scheme,
                'model'         : model,
                'window'        : win,
                'portfolio_type': ptype,
                'tc_bps'        : tc,
            })
            results.append(stats)

    df_out = pd.DataFrame(results)[[
        'scheme','model','window','portfolio_type','tc_bps','N_obs',
        'alpha_daily','alpha_annual','t_alpha',
        'IR_daily','IR_annual','R2_zero',
        'beta_MKT','t_MKT','beta_SMB','t_SMB',
        'beta_HML','t_HML','beta_RMW','t_RMW',
        'beta_CMA','t_CMA','beta_UMD','t_UMD'
    ]]

    tag = 'gross' if gross_only else 'net'
    fname = f'5_factor_analysis_{scheme}_{tag}.csv'
    df_out.to_csv(os.path.join(out_dir, fname), index=False)
    print(f'[Saved] {fname}')
    return df_out



def run_all_factor_tests(vw_csv="portfolio_daily_series_VW.csv",
                         ew_csv="portfolio_daily_series_EW.csv",
                         factor_csv="/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/5_Factors_Plus_Momentum.csv",
                         save_dir="results",
                         y_is_excess=True,
                         hac_lags=5,
                         save_txt=True):
    vw_df = pd.read_csv(vw_csv)
    ew_df = pd.read_csv(ew_csv)

    vw_gross = batch_factor_analysis(
        vw_df, factor_csv, scheme='VW', gross_only=True)
    vw_net   = batch_factor_analysis(
        vw_df, factor_csv, scheme='VW', gross_only=False)

    ew_gross = batch_factor_analysis(
        ew_df, factor_csv, scheme='EW', gross_only=True)
    ew_net   = batch_factor_analysis(
        ew_df, factor_csv, scheme='EW', gross_only=False)

    return vw_gross, vw_net, ew_gross, ew_net
    

vw_gross, vw_net, ew_gross, ew_net = run_all_factor_tests()

[Saved] 5_factor_analysis_VW_gross.csv 
[Saved] 5_factor_analysis_VW_net.csv 
[Saved] 5_factor_analysis_EW_gross.csv 
[Saved] 5_factor_analysis_EW_net.csv 


In [None]:
# === File paths ===
rf_file = "/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/CRSP_2016_2024_top50_with_exret.csv"
vw_file = "portfolio_daily_series_VW.csv"
ew_file = "portfolio_daily_series_EW.csv"

# === Load risk-free rate data ===

rf_df = pd.read_csv(rf_file, usecols=["date", "rf"])
rf_df["date"] = pd.to_datetime(rf_df["date"])
rf_dict = dict(zip(rf_df["date"], rf_df["rf"]))


def adjust_returns_with_rf_grouped(file_path, output_path):
    df = pd.read_csv(file_path)
    # Parse date with flexible format
    df["date"] = pd.to_datetime(df["date"], format='mixed', dayfirst=True)

    # Find all return columns (exclude cumulative)
    return_cols = [col for col in df.columns if "return" in col and "cumul" not in col]

    # Set portfolio_type order to avoid groupby sorting issues
    order = ["long_only", "short_only", "long_short"]
    df["portfolio_type"] = pd.Categorical(df["portfolio_type"], categories=order, ordered=True)

    df_list = []
    for _, group in df.groupby(["scheme", "model", "window", "portfolio_type"], sort=False):
        group = group.sort_values("date").copy()
        for col in return_cols:
            group[col] = group.apply(lambda row: row[col] + rf_dict.get(row["date"], 0), axis=1)
            cum_col = col.replace("return", "cumulative")
            group[cum_col] = np.log1p(group[col]).cumsum()
        df_list.append(group)

    df_new = pd.concat(df_list).sort_values(["scheme", "model", "window", "portfolio_type", "date"])
    df_new.to_csv(output_path, index=False)
    print(f"Finished: {output_path}")

adjust_returns_with_rf_grouped(vw_file, "portfolio_daily_series_VW_with_rf.csv")
adjust_returns_with_rf_grouped(ew_file, "portfolio_daily_series_EW_with_rf.csv")


Finish: portfolio_daily_series_VW_with_rf.csv
Finish: portfolio_daily_series_EW_with_rf.csv


In [None]:

# Download S&P500 (2016-2024)
sp500 = yf.download("^GSPC", start="2016-01-01", end="2024-12-31")
price_col = "Adj Close" if "Adj Close" in sp500.columns else "Close"
sp500["daily_return"] = sp500[price_col].pct_change().fillna(0)
# Cumulative log return (as in the paper)
sp500["cum_return"] = np.cumsum(np.log1p(sp500["daily_return"]))
sp500 = sp500[["cum_return"]]
sp500.index = pd.to_datetime(sp500.index)

# Configuration
files = [
    ("VW", "portfolio_daily_series_VW_with_rf.csv"),
    ("EW", "portfolio_daily_series_EW_with_rf.csv")
]
tc_levels = [0, 5, 10, 20, 40]      # Transaction cost (bps)
windows = [5, 21, 252, 512]         # Window sizes
strategies = ["long_only", "short_only", "long_short"]

output_dir = "Baseline_Portfolio"
os.makedirs(output_dir, exist_ok=True)

# Economic event periods (for shading)
crisis_periods = [
    (datetime(2018, 6, 1), datetime(2019, 1, 1), "US-China Trade War"),
    (datetime(2020, 2, 1), datetime(2020, 7, 1), "COVID-19"),
    (datetime(2022, 2, 1), datetime(2022, 6, 1), "Russia-Ukraine War"),
    (datetime(2023, 1, 1), datetime(2023, 4, 1), "US Bank Crisis"),
]

def plot_comparison_styled(df, scheme, tc, window):
    plt.figure(figsize=(15, 12))
    model_names = df["model"].unique()
    colors = plt.cm.tab10(np.linspace(0, 1, len(model_names)))

    offset_step = 0.02

    for i, strat in enumerate(strategies, 1):
        ax = plt.subplot(3, 1, i)

        plt.plot(sp500.index, sp500["cum_return"],
                 color="black", lw=2.5, label="S&P500 (Total Return)", zorder=10)

        for idx, model_name in enumerate(model_names):
            sub = df[(df["window"] == window) &
                     (df["portfolio_type"] == strat) &
                     (df["model"] == model_name)].sort_values("date")
            if sub.empty:
                continue

            if tc == 0:
                ret_col = "return"          # Raw excess return
            else:
                ret_col = f"tc{tc}_return"  # Return with transaction cost

            if ret_col not in sub.columns:
                continue

            log_cum = np.cumsum(np.log1p(sub[ret_col].values))

            y_shift = idx * offset_step
            plt.plot(sub["date"], log_cum + y_shift,
                     label=f"{model_name} ({strat.replace('_',' ').title()})",
                     lw=2, color=colors[idx], alpha=0.9)

        for start, end, label in crisis_periods:
            ax.axvspan(start, end, color='grey', alpha=0.3)
            ax.text(start + pd.Timedelta(days=10),
                    ax.get_ylim()[1]*0.92, label, fontsize=8, color='grey')
        ax.xaxis.set_major_locator(mdates.YearLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
        ax.set_ylabel("Cumulative log return (start = 0)")
        ax.set_title(f"{scheme} | Window={window} | Strategy={strat} | TC={tc} bps")
        ax.grid(alpha=0.3)
        plt.xticks(rotation=30)
        plt.legend(bbox_to_anchor=(1.04, 1), loc='upper left', fontsize=8)

    plt.tight_layout()
    fname = f"{scheme}_window{window}_TC{tc}_logreturn_offset.png"
    plt.savefig(os.path.join(output_dir, fname), dpi=300, bbox_inches='tight')
    plt.close()

# Main loop to generate all figures
for scheme, file_path in files:
    df = pd.read_csv(file_path)
    df["date"] = pd.to_datetime(df["date"])
    for tc in tc_levels:
        for window in windows:
            plot_comparison_styled(df, scheme, tc, window)

print(f"All figures have been generated and saved to: {output_dir}/")


[*********************100%***********************]  1 of 1 completed


All figures have been generated and saved to: Baseline_Portfolio/


In [None]:

# Load R²_zero from portfolio_metrics.csv
metrics_df = pd.read_csv("portfolio_metrics.csv")[["Model", "Window", "R²_zero"]]
metrics_df.rename(columns={"Model": "model", "Window": "window"}, inplace=True)

# Process VW/EW files
for fname in ["portfolio_results_daily_rebalance_VW.csv", "portfolio_results_daily_rebalance_EW.csv"]:
    df = pd.read_csv(fname)

    # Merge R²_zero by model and window
    df = df.merge(metrics_df, on=["model", "window"], how="left")

    rows = []
    for _, row in df.iterrows():
        r2 = float(row["R²_zero"]) if not pd.isna(row["R²_zero"]) else 0.0
        if row["portfolio_type"] == "long_only":
            d_sr, sr_star = delta_sharpe(r2, SR_MKT_EX)
            row["ΔSharpe"]  = d_sr
            row["Sharpe*"]  = sr_star
            row["baseline"] = f"SPX_excess ({SR_MKT_EX:.2f})"
        else:
            d_sr, sr_star = delta_sharpe(r2, 0)
            row["ΔSharpe"]  = d_sr
            row["Sharpe*"]  = sr_star
            row["baseline"] = "cash (0)"
        rows.append(row)

    pd.DataFrame(rows).to_csv(fname, index=False)
    print(f"[Update] ΔSharpe has been written to {fname}")

[Update] ΔSharpe has been written to portfolio_results_daily_rebalance_VW.csv
[Update] ΔSharpe has been written to portfolio_results_daily_rebalance_EW.csv


In [None]:
import pandas as pd
import numpy as np
from math import sqrt

PRED_PATH = "predictions_daily.csv"
METRICS_PATH = "portfolio_metrics.csv"
TREAT_CONSTANT_DAY_AS_ZERO = False
MIN_DAYS_FOR_STATS = 1

def _day_ic(g):
    if g["y_pred"].nunique(dropna=True) <= 1 or g["y_true"].nunique(dropna=True) <= 1:
        return 0.0 if TREAT_CONSTANT_DAY_AS_ZERO else np.nan
    return g["y_pred"].corr(g["y_true"], method="spearman")

def rankic_stats(df_group):
    ics = (df_group.groupby("signal_date", observed=True).apply(_day_ic).dropna())
    n = int(ics.shape[0])
    if n < MIN_DAYS_FOR_STATS:
        return pd.Series({"RankIC_mean": np.nan, "RankIC_t": np.nan, "RankIC_pos%": np.nan, "N_days": n})
    mean_ic = float(ics.mean())
    std_ic  = float(ics.std(ddof=1))
    t_ic    = mean_ic / (std_ic / np.sqrt(n)) if std_ic > 0 else np.nan
    pos_pct = float((ics > 0).mean())
    return pd.Series({"RankIC_mean": mean_ic, "RankIC_t": t_ic, "RankIC_pos%": pos_pct, "N_days": n})

# Read data and calculate RankIC
pred = pd.read_csv(PRED_PATH)
pred["signal_date"] = pd.to_datetime(pred["signal_date"], errors="coerce")
pred = pred.dropna(subset=["signal_date", "y_true", "y_pred", "model", "window"])
pred["window"] = pd.to_numeric(pred["window"], errors="coerce").astype("Int64")

rankic_df = (pred.groupby(["model", "window"], dropna=False)
                .apply(rankic_stats)
                .reset_index()
                .rename(columns={"model":"Model","window":"Window"}))

# Merge: keep new RankIC columns, add _old suffix to original metrics columns
metrics = pd.read_csv(METRICS_PATH)
metrics["Window"] = pd.to_numeric(metrics["Window"], errors="coerce").astype("Int64")

merged = metrics.merge(rankic_df, on=["Model","Window"], how="left", suffixes=("_old",""))

# Drop old columns (with _old suffix)
to_drop = [c for c in merged.columns if c.endswith("_old")]
merged = merged.drop(columns=to_drop)

# Save and overwrite
merged.to_csv(METRICS_PATH, index=False)
print("[OK] Overwrote portfolio_metrics.csv with new RankIC")


  ics = (df_group.groupby("signal_date", observed=True).apply(_day_ic).dropna())
  ics = (df_group.groupby("signal_date", observed=True).apply(_day_ic).dropna())
  ics = (df_group.groupby("signal_date", observed=True).apply(_day_ic).dropna())


[OK] Overwrote portfolio_metrics.csv with new RankIC )


  ics = (df_group.groupby("signal_date", observed=True).apply(_day_ic).dropna())
  .apply(rankic_stats)
