In [3]:
import sys
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import random
import os
def set_seed(seed=42):
    """Set all random seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if torch.backends.mps.is_available():
        torch.mps.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(42)

sys.path.append("src")

from chronos import BaseChronosPipeline 

pipeline = BaseChronosPipeline.from_pretrained(
    "amazon/chronos-t5-base",
    device_map="mps",
    torch_dtype=torch.float32
)

try:
    pipeline.model = torch.compile(pipeline.model, backend="mps")
    print("torch.compile optimization enabled")
except:
    print("torch.compile not available, continuing with standard mode")

data = np.load("/Users/june/Documents/University of Manchester/Data Science/ERP/Project code/1_Data_Preprocessing/all_window_datasets_unscaled.npz", 
               allow_pickle=True)

window_sizes = [5, 21, 252, 512]
results = {}

def get_batch_size(window_size, base_batch=256):
    """
    Dynamically calculate batch size based on window size.
    Larger window sizes require smaller batch sizes to avoid memory issues.
    """
    if window_size <= 5:
        return base_batch
    elif window_size <= 21:
        return base_batch // 1  
    elif window_size <= 252:
        return base_batch // 4  
    elif window_size <= 512:
        return base_batch // 4  
    else:
        return max(base_batch // (window_size // 10), 64)

print("Dynamic batch size configuration:")
for ws in window_sizes:
    batch_size = get_batch_size(ws)
    print(f"  Window {ws}: batch size = {batch_size}")

print(f"Device: {pipeline.model.device}")

! torch.compile not available, continuing with standard mode
Dynamic batch size configuration:
  Window 5: batch size = 256
  Window 21: batch size = 256
  Window 252: batch size = 64
  Window 512: batch size = 64
Device: mps:0


In [4]:
def r2_zero(y_true, y_pred):
    """
    Calculate zero-based R² (baseline is 0).
    y_true: true values array (N,)
    y_pred: predicted values array (N,)
    """
    rss = np.sum((y_true - y_pred)**2)  
    tss = np.sum(y_true**2)            
    return 1 - rss / tss

def calc_directional_metrics(y_true, y_pred, permnos=None):
    """
    Calculate directional accuracy metrics.
    If permnos is provided, calculate metrics per group and average.
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    if permnos is None:
        s_true = np.sign(y_true)
        s_pred = np.sign(y_pred)
        mask = s_true != 0
        s_true = s_true[mask]
        s_pred = s_pred[mask]

        overall_acc = np.mean(s_true == s_pred)

        up_mask = s_true > 0
        down_mask = s_true < 0
        up_acc = np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else 0
        down_acc = np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else 0

    else:
        df = pd.DataFrame({"permno": permnos, "yt": y_true, "yp": y_pred})
        overall_accs = []
        up_accs = []
        down_accs = []

        for _, g in df.groupby("permno"):
            s_true = np.sign(g["yt"].values)
            s_pred = np.sign(g["yp"].values)
            mask = s_true != 0
            s_true = s_true[mask]
            s_pred = s_pred[mask]
            if len(s_true) == 0:
                continue
            overall_accs.append(np.mean(s_true == s_pred))

            up_mask = s_true > 0
            down_mask = s_true < 0
            up_accs.append(np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else np.nan)
            down_accs.append(np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else np.nan)

        overall_acc = np.nanmean(overall_accs)
        up_acc = np.nanmean(up_accs)
        down_acc = np.nanmean(down_accs)

    return overall_acc, up_acc, down_acc

def calculate_metrics(y_true, y_pred, k=1, meta=None, permnos=None):
    """
    Calculate regression and directional metrics.
    If meta is provided, also calculate metrics for market cap groups.
    """
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    n = len(y_true)

    r2 = r2_zero(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)  

    dir_acc, up_acc, down_acc = calc_directional_metrics(y_true, y_pred, permnos)

    metrics = {
        "R²": r2,
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,  
        "Directional Accuracy": dir_acc,
        "Up_Directional_Acc": up_acc,
        "Down_Directional_Acc": down_acc
    }

    if meta is not None and "MKTCAP_PERCENTILE" in meta.columns:
        top_mask = meta["MKTCAP_PERCENTILE"] >= 0.75
        bottom_mask = meta["MKTCAP_PERCENTILE"] <= 0.25

        if np.any(top_mask):
            yt_top = y_true[top_mask]
            yp_top = y_pred[top_mask]
            perm_top = permnos[top_mask] if permnos is not None else None
            r2_top = r2_zero(yt_top, yp_top)
            mae_top = mean_absolute_error(yt_top, yp_top)
            mse_top = mean_squared_error(yt_top, yp_top)
            rmse_top = np.sqrt(mse_top)  
            dir_top, up_top, down_top = calc_directional_metrics(yt_top, yp_top, perm_top)
            metrics.update({
                "Top25_R2": r2_top,
                "Top25_MAE": mae_top,
                "Top25_MSE": mse_top,
                "Top25_RMSE": rmse_top,  
                "Top25_Dir_Acc": dir_top,
                "Top25_Up_Acc": up_top,
                "Top25_Down_Acc": down_top
            })

        if np.any(bottom_mask):
            yt_bot = y_true[bottom_mask]
            yp_bot = y_pred[bottom_mask]
            perm_bot = permnos[bottom_mask] if permnos is not None else None
            r2_bot = r2_zero(yt_bot, yp_bot)
            mae_bot = mean_absolute_error(yt_bot, yp_bot)
            mse_bot = mean_squared_error(yt_bot, yp_bot)
            rmse_bot = np.sqrt(mse_bot)  
            dir_bot, up_bot, down_bot = calc_directional_metrics(yt_bot, yp_bot, perm_bot)
            metrics.update({
                "Bottom25_R2": r2_bot,
                "Bottom25_MAE": mae_bot,
                "Bottom25_MSE": mse_bot,
                "Bottom25_RMSE": rmse_bot,  
                "Bottom25_Dir_Acc": dir_bot,
                "Bottom25_Up_Acc": up_bot,
                "Bottom25_Down_Acc": down_bot
            })

    return metrics

def batch_predict(X_test_tensor, pipeline, batch_size=4096):
    """
    Batch inference for faster prediction.
    """
    all_predictions = []
    
    with torch.no_grad():
        for i in tqdm(range(0, len(X_test_tensor), batch_size), desc="Batch Inference"):
            batch_end = min(i + batch_size, len(X_test_tensor))
            batch_ctx = X_test_tensor[i:batch_end]
            context_list = [seq for seq in batch_ctx]
            forecasts = pipeline.predict(
                context=context_list,
                prediction_length=1,
                num_samples=10
            )
            batch_means = np.array([f[0].mean() for f in forecasts])
            all_predictions.extend(batch_means.ravel())
    
    return np.array(all_predictions)

for window_size in window_sizes:
    print(f"\n=== Processing Window Size: {window_size} ===")
    start_time = time.time()
    
    X_test = data[f"X_test_{window_size}"]
    y_test = data[f"y_test_{window_size}"]
    meta_test = pd.DataFrame(data[f"meta_test_{window_size}"].item())
    
    current_batch_size = get_batch_size(window_size)
    
    print(f"Test samples: {len(X_test):,}")
    print(f"Sequence length: {X_test.shape[1]}")
    print(f"Batch size for window {window_size}: {current_batch_size}")
    
    print("Converting data to Tensor...")
    X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
    
    print("Starting batch inference...")
    all_predictions = batch_predict(X_test_tensor, pipeline, current_batch_size)
    
    print("Calculating evaluation metrics...")
    metrics = calculate_metrics(y_test, all_predictions, k=window_size, meta=meta_test, permnos=meta_test["PERMNO"].values)
    
    results[window_size] = {
        'predictions': all_predictions,
        'true_values': y_test,
        'metrics': metrics,
        'meta': meta_test
    }
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    samples_per_second = len(X_test) / elapsed_time
    
    print(f"\n  Window {window_size} processing completed:")
    print(f"   Total time: {elapsed_time:.2f} seconds")
    print(f"   Processing speed: {samples_per_second:.0f} samples/second")
    print(f"   Estimated speedup vs sequential: {samples_per_second/100:.1f}x")
    
    print(f"\n Window {window_size} evaluation metrics:")
    main_metrics = ["R²", "MAE", "MSE", "Directional Accuracy", "Up_Directional_Acc", "Down_Directional_Acc"]
    for metric_name in main_metrics:
        if metric_name in metrics:
            value = metrics[metric_name]
            print(f"   {metric_name}: {value:.4f}" if value is not None else f"   {metric_name}: N/A")
    
    del X_test_tensor
    torch.mps.empty_cache() if torch.backends.mps.is_available() else None



=== Processing Window Size: 5 ===
Test samples: 110,850
Sequence length: 5
Batch size for window 5: 256
Converting data to Tensor...
Starting batch inference...


Batch Inference: 100%|██████████| 434/434 [02:18<00:00,  3.13it/s]


Calculating evaluation metrics...

  Window 5 processing completed:
   Total time: 139.52 seconds
   Processing speed: 794 samples/second
   Estimated speedup vs sequential: 7.9x

 Window 5 evaluation metrics:
   R²: -0.6636
   MAE: 0.0145
   MSE: 0.0004
   Directional Accuracy: 0.4984
   Up_Directional_Acc: 0.5885
   Down_Directional_Acc: 0.3989

=== Processing Window Size: 21 ===
Test samples: 110,850
Sequence length: 21
Batch size for window 21: 256
Converting data to Tensor...
Starting batch inference...


Batch Inference: 100%|██████████| 434/434 [06:01<00:00,  1.20it/s]


Calculating evaluation metrics...

  Window 21 processing completed:
   Total time: 362.44 seconds
   Processing speed: 306 samples/second
   Estimated speedup vs sequential: 3.1x

 Window 21 evaluation metrics:
   R²: -0.1389
   MAE: 0.0126
   MSE: 0.0003
   Directional Accuracy: 0.5002
   Up_Directional_Acc: 0.5904
   Down_Directional_Acc: 0.3994

=== Processing Window Size: 252 ===
Test samples: 110,850
Sequence length: 252
Batch size for window 252: 64
Converting data to Tensor...
Starting batch inference...


Batch Inference: 100%|██████████| 1733/1733 [1:05:04<00:00,  2.25s/it]


Calculating evaluation metrics...

  Window 252 processing completed:
   Total time: 3905.06 seconds
   Processing speed: 28 samples/second
   Estimated speedup vs sequential: 0.3x

 Window 252 evaluation metrics:
   R²: -0.0239
   MAE: 0.0120
   MSE: 0.0003
   Directional Accuracy: 0.4984
   Up_Directional_Acc: 0.4651
   Down_Directional_Acc: 0.5313

=== Processing Window Size: 512 ===
Test samples: 110,850
Sequence length: 512
Batch size for window 512: 64
Converting data to Tensor...
Starting batch inference...


Batch Inference: 100%|██████████| 1733/1733 [2:10:05<00:00,  4.50s/it] 


Calculating evaluation metrics...

  Window 512 processing completed:
   Total time: 7806.34 seconds
   Processing speed: 14 samples/second
   Estimated speedup vs sequential: 0.1x

 Window 512 evaluation metrics:
   R²: -0.0264
   MAE: 0.0120
   MSE: 0.0003
   Directional Accuracy: 0.4943
   Up_Directional_Acc: 0.3742
   Down_Directional_Acc: 0.6221


In [5]:
# Save results
import os
import joblib

# Create directories for saving results
os.makedirs("chronos_t5_base_results", exist_ok=True)
os.makedirs("chronos_t5_base_predictions", exist_ok=True)

# Save the complete results to a pkl file
results_dict = {
    'window_sizes': window_sizes,
    'results': results,
    'model_name': 'Chronos-T5-Base'
}
joblib.dump(results_dict, "chronos_t5_base_results/results_base.pkl")
print("[Save] results_base.pkl saved successfully (base)")

# Save evaluation metrics
metrics_df = pd.DataFrame([
    {**{"Window": window_size}, **results[window_size]["metrics"]}
    for window_size in window_sizes
])
metrics_df.to_csv("chronos_t5_base_results/chronos_t5_base_metrics.csv", index=False)

# Save prediction results
for window_size in window_sizes:
    df = pd.DataFrame({
        "PERMNO": results[window_size]["meta"]["PERMNO"],
        "y_true": results[window_size]["true_values"],
        "y_pred": results[window_size]["predictions"]
    })
    df.to_csv(f"chronos_t5_base_predictions/chronos_t5_base_w{window_size}.csv", index=False)


[Save] results_base.pkl saved successfully (base)


In [6]:
# Load and display saved results
def load_results(file_path="chronos_t5_base_results/results_base.pkl"):
    """Load saved results"""
    loaded_results = joblib.load(file_path)
    print(f"\n=== {loaded_results['model_name']} Results ===")
    print(f"Window sizes: {loaded_results['window_sizes']}")
    
    for window_size in loaded_results['window_sizes']:
        print(f"\nMetrics for window {window_size}:")
        metrics = loaded_results['results'][window_size]['metrics']
        main_metrics = ["R²", "MAE", "MSE", "Directional Accuracy", "Up_Directional_Acc", "Down_Directional_Acc"]
        for metric_name in main_metrics:
            if metric_name in metrics:
                value = metrics[metric_name]
                print(f"{metric_name}: {value:.4f}" if value is not None else f"{metric_name}: N/A")
    
    return loaded_results

loaded_results = load_results()



=== Chronos-T5-Base Results ===
Window sizes: [5, 21, 252, 512]

Metrics for window 5:
R²: -0.6636
MAE: 0.0145
MSE: 0.0004
Directional Accuracy: 0.4984
Up_Directional_Acc: 0.5885
Down_Directional_Acc: 0.3989

Metrics for window 21:
R²: -0.1389
MAE: 0.0126
MSE: 0.0003
Directional Accuracy: 0.5002
Up_Directional_Acc: 0.5904
Down_Directional_Acc: 0.3994

Metrics for window 252:
R²: -0.0239
MAE: 0.0120
MSE: 0.0003
Directional Accuracy: 0.4984
Up_Directional_Acc: 0.4651
Down_Directional_Acc: 0.5313

Metrics for window 512:
R²: -0.0264
MAE: 0.0120
MSE: 0.0003
Directional Accuracy: 0.4943
Up_Directional_Acc: 0.3742
Down_Directional_Acc: 0.6221
