In [1]:
# ============================================================================
# Chronos T5 Large - Google Colab T4 GPU Optimized Version
# ============================================================================

# Cell 1: Environment setup and model loading
from google.colab import drive
drive.mount('/content/drive')

!mkdir -p /content/drive/MyDrive/chronos_t5_large_project

%cd /content/drive/MyDrive/chronos_t5_large_project

!git clone https://github.com/amazon-science/chronos-forecasting
%cd chronos-forecasting

%pip install torch
%pip install transformers
%pip install datasets
%pip install accelerate
%pip install scikit-learn
%pip install tqdm
%pip install joblib

import torch
print("CUDA is available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Current CUDA device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))

Mounted at /content/drive
/content/drive/MyDrive/chronos_t5_large_project
Cloning into 'chronos-forecasting'...
remote: Enumerating objects: 500, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (26/26), done.[K
remote: Total 500 (delta 22), reused 2 (delta 2), pack-reused 472 (from 3)[K
Receiving objects: 100% (500/500), 1.03 MiB | 7.08 MiB/s, done.
Resolving deltas: 100% (217/217), done.
/content/drive/MyDrive/chronos_t5_large_project/chronos-forecasting
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-c

In [2]:
import sys
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import random
import os
def set_seed(seed=42):
    """Set all random seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # Set random seed for Mac MPS device
    if torch.backends.mps.is_available():
        torch.mps.manual_seed(seed)
    # Ensure deterministic behavior
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(42)

# Add local path for importing modules from src
sys.path.append("src")

from chronos import BaseChronosPipeline
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

try:
    pipeline = BaseChronosPipeline.from_pretrained(
        "amazon/chronos-t5-large",
        device_map="auto" if torch.cuda.is_available() else "cpu",
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
    )
    print(f"Successfully loaded Chronos T5 Large model on {device}")

    if hasattr(pipeline.model, 'device'):
        print(f"Model device: {pipeline.model.device}")

except Exception as e:
    print(f"Failed to load model: {e}")
    raise

Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/2.84G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

Successfully loaded Chronos T5 Large model on cuda
Model device: cuda:0


In [3]:
import os
data_path = "/content/drive/MyDrive/ERP Data/all_window_datasets_unscaled.npz"
if os.path.exists(data_path):
    data = np.load(data_path, allow_pickle=True)
    print("Data loaded successfully!")
else:
    print(f"Data file not found: {data_path}")
    print("Please make sure all_window_datasets.npz is uploaded to the 'ERP Data' folder in your Google Drive.")

window_sizes = [5, 21, 252, 512]
results = {}

def get_batch_size(window_size):
    """
    Batch size configuration optimized for T4 GPU (16GB).
    """
    if window_size <= 5:
        return 2048  # 4x increase
    elif window_size <= 21:
        return 512   # 2x increase
    elif window_size <= 252:
        return 64   # 4x increase
    elif window_size <= 512:
        return 24   # 4x increase
    else:
        return 32

print("T4 GPU optimized batch size configuration:")
for ws in window_sizes:
    batch_size = get_batch_size(ws)
    print(f"  Window {ws}: batch size = {batch_size}")

Data loaded successfully!
T4 GPU optimized batch size configuration:
  Window 5: batch size = 2048
  Window 21: batch size = 512
  Window 252: batch size = 64
  Window 512: batch size = 24


In [4]:
# ============================================================================
# Cell 2: Prediction (T4 GPU optimized version)
# ============================================================================

def r2_zero(y_true, y_pred):
    """
    Compute R² (zero-based, baseline is 0)
    y_true: true values array (N,)
    y_pred: predicted values array (N,)
    """
    rss = np.sum((y_true - y_pred)**2)
    tss = np.sum(y_true**2)
    return 1 - rss / tss

def calc_directional_metrics(y_true, y_pred, permnos=None):
    """Compute directional accuracy metrics (consistent with TimesFM)"""
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    if permnos is None:
        s_true = np.sign(y_true)
        s_pred = np.sign(y_pred)
        mask = s_true != 0
        s_true = s_true[mask]
        s_pred = s_pred[mask]
        overall_acc = np.mean(s_true == s_pred)
        up_mask = s_true > 0
        down_mask = s_true < 0
        up_acc = np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else 0
        down_acc = np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else 0
    else:
        df = pd.DataFrame({"permno": permnos, "yt": y_true, "yp": y_pred})
        overall_accs = []
        up_accs = []
        down_accs = []
        for _, g in df.groupby("permno"):
            s_true = np.sign(g["yt"].values)
            s_pred = np.sign(g["yp"].values)
            mask = s_true != 0
            s_true = s_true[mask]
            s_pred = s_pred[mask]
            if len(s_true) == 0:
                continue
            overall_accs.append(np.mean(s_true == s_pred))
            up_mask = s_true > 0
            down_mask = s_true < 0
            up_accs.append(np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else np.nan)
            down_accs.append(np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else np.nan)
        overall_acc = np.nanmean(overall_accs)
        up_acc = np.nanmean(up_accs)
        down_acc = np.nanmean(down_accs)
    return overall_acc, up_acc, down_acc

def calculate_metrics(y_true, y_pred, permnos=None, meta=None):
    """Compute evaluation metrics (consistent with TimesFM)"""
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_zero(y_true, y_pred)
    dir_acc, up_acc, down_acc = calc_directional_metrics(y_true, y_pred, permnos)

    metrics = {
        "R2": r2,
        "MAE": mae,
        "MSE": mse,
        "Directional Accuracy": dir_acc,
        "Up_Directional_Acc": up_acc,
        "Down_Directional_Acc": down_acc
    }

    # Market cap group metrics
    if meta is not None and "MKTCAP_PERCENTILE" in meta:
        top_mask = meta["MKTCAP_PERCENTILE"] >= 0.75
        bottom_mask = meta["MKTCAP_PERCENTILE"] <= 0.25

        if np.any(top_mask):
            yt_top = y_true[top_mask]
            yp_top = y_pred[top_mask]
            perm_top = permnos[top_mask] if permnos is not None else None
            r2_top = r2_zero(yt_top, yp_top)
            mae_top = mean_absolute_error(yt_top, yp_top)
            mse_top = mean_squared_error(yt_top, yp_top)
            dir_top, up_top, down_top = calc_directional_metrics(yt_top, yp_top, perm_top)
            metrics.update({
                "Top25_R2": r2_top,
                "Top25_MAE": mae_top,
                "Top25_MSE": mse_top,
                "Top25_Dir_Acc": dir_top,
                "Top25_Up_Acc": up_top,
                "Top25_Down_Acc": down_top
            })

        if np.any(bottom_mask):
            yt_bot = y_true[bottom_mask]
            yp_bot = y_pred[bottom_mask]
            perm_bot = permnos[bottom_mask] if permnos is not None else None
            r2_bot = r2_zero(yt_bot, yp_bot)
            mae_bot = mean_absolute_error(yt_bot, yp_bot)
            mse_bot = mean_squared_error(yt_bot, yp_bot)
            dir_bot, up_bot, down_bot = calc_directional_metrics(yt_bot, yp_bot, perm_bot)
            metrics.update({
                "Bottom25_R2": r2_bot,
                "Bottom25_MAE": mae_bot,
                "Bottom25_MSE": mse_bot,
                "Bottom25_Dir_Acc": dir_bot,
                "Bottom25_Up_Acc": up_bot,
                "Bottom25_Down_Acc": down_bot
            })

    return metrics


In [5]:
def batch_predict_gpu_optimized(X_test_tensor, pipeline, batch_size=512):
    """
    T4 GPU optimized batch inference function.
    Input remains on CPU, pipeline handles token_ids transfer internally.
    """
    all_predictions = []
    original_batch_size = batch_size

    print(f"Starting GPU optimized inference with batch size: {batch_size}")

    with torch.no_grad():
        for i in tqdm(range(0, len(X_test_tensor), batch_size), desc="GPU Batch Inference"):
            try:
                batch_end = min(i + batch_size, len(X_test_tensor))
                batch_ctx = X_test_tensor[i:batch_end]

                forecasts = pipeline.predict(
                    context=batch_ctx,
                    prediction_length=1,
                    num_samples=10
                )

                predictions = np.array([float(f[0].mean()) for f in forecasts])
                all_predictions.extend(predictions)

                if i % (batch_size * 10) == 0 and torch.cuda.is_available():
                    torch.cuda.empty_cache()

            except RuntimeError as e:
                if "out of memory" in str(e).lower():
                    print(f"\nOOM detected. Reducing batch size from {batch_size} to {batch_size // 2}")
                    torch.cuda.empty_cache()
                    batch_size = max(batch_size // 2, 1)

                    batch_end = min(i + batch_size, len(X_test_tensor))
                    batch_ctx = X_test_tensor[i:batch_end]

                    _, mean = pipeline.predict_quantiles(
                        context=batch_ctx,
                        prediction_length=1,
                        quantile_levels=[0.5]
                    )
                    predictions = mean.cpu().numpy().ravel()
                    all_predictions.extend(predictions)
                else:
                    raise e

    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    print(f"\nInference completed. Final batch size: {batch_size} (original: {original_batch_size})")
    return np.array(all_predictions)

!mkdir -p /content/drive/MyDrive/chronos_t5_large_project/chronos_t5_results
!mkdir -p /content/drive/MyDrive/chronos_t5_large_project/chronos_t5_predictions

print("Starting T4 GPU optimized prediction...")
print(f"Data file contains keys: {list(data.keys())}")

for window_size in window_sizes:
    print(f"\n=== Processing Window Size: {window_size} ===")
    start_time = time.time()

    X_test = data[f"X_test_{window_size}"]
    y_test = data[f"y_test_{window_size}"]
    meta_test = pd.DataFrame(data[f"meta_test_{window_size}"].item())

    current_batch_size = get_batch_size(window_size)

    print(f"Test samples: {len(X_test):,}")
    print(f"Sequence length: {X_test.shape[1]}")
    print(f"Optimized batch size: {current_batch_size}")

    X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
    all_predictions = batch_predict_gpu_optimized(X_test_tensor, pipeline, current_batch_size)

    permnos_test = meta_test["PERMNO"].values
    metrics = calculate_metrics(y_test, all_predictions, permnos_test, meta_test)

    print("\n=== Directional Sanity Check ===")
    print("Pos ratio (y_test):", (y_test > 0).mean())
    print("Neg ratio (y_test):", (y_test < 0).mean())
    sign_pred = np.sign(all_predictions)
    print("Pred +1 ratio:", (sign_pred > 0).mean())
    print("Pred -1 ratio:", (sign_pred < 0).mean())

    from sklearn.metrics import confusion_matrix
    conf = confusion_matrix(np.sign(y_test), sign_pred, labels=[1, -1])
    print("      Pred+  Pred-")
    print("+1 |", conf[0])
    print("-1 |", conf[1])

    results[window_size] = {
        'predictions': all_predictions,
        'true_values': y_test,
        'metrics': metrics,
        'meta': meta_test
    }

    end_time = time.time()
    elapsed_time = end_time - start_time
    samples_per_second = len(X_test) / elapsed_time

    print(f"\nWindow {window_size} processing completed:")
    print(f"Total time: {elapsed_time:.2f} seconds")
    print(f"Processing speed: {samples_per_second:.0f} samples/second")

    print(f"\nMetrics for window {window_size}:")
    for metric_name, value in metrics.items():
        print(f"{metric_name}: {value:.4f}")

    results_df = pd.DataFrame({
        'PERMNO': meta_test['PERMNO'],
        'y_true': y_test,
        'y_pred': all_predictions
    })
    results_df.to_csv(f'/content/drive/MyDrive/chronos_t5_large_project/chronos_t5_predictions/chronos_t5_large_w{window_size}.csv', index=False)
    print(f"Predictions saved to chronos_t5_large_w{window_size}.csv")

    del X_test_tensor
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print("\n=== All predictions completed! ===")

Starting T4 GPU optimized prediction...
Data file contains keys: ['X_train_5', 'y_train_5', 'meta_train_5', 'market_caps_train_5', 'X_test_5', 'y_test_5', 'meta_test_5', 'market_caps_test_5', 'X_train_21', 'y_train_21', 'meta_train_21', 'market_caps_train_21', 'X_test_21', 'y_test_21', 'meta_test_21', 'market_caps_test_21', 'X_train_252', 'y_train_252', 'meta_train_252', 'market_caps_train_252', 'X_test_252', 'y_test_252', 'meta_test_252', 'market_caps_test_252', 'X_train_512', 'y_train_512', 'meta_train_512', 'market_caps_train_512', 'X_test_512', 'y_test_512', 'meta_test_512', 'market_caps_test_512']

=== Processing Window Size: 5 ===
Test samples: 110,850
Sequence length: 5
Optimized batch size: 2048
Starting GPU optimized inference with batch size: 2048


GPU Batch Inference: 100%|██████████| 55/55 [00:41<00:00,  1.33it/s]



Inference completed. Final batch size: 2048 (original: 2048)

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.6125755525484889
Pred -1 ratio: 0.38742444745151106
      Pred+  Pred-
+1 | [35149 22773]
-1 | [32702 20137]

Window 5 processing completed:
Total time: 43.83 seconds
Processing speed: 2529 samples/second

Metrics for window 5:
R2: -0.5388
MAE: 0.0143
MSE: 0.0004
Directional Accuracy: 0.4991
Up_Directional_Acc: 0.6065
Down_Directional_Acc: 0.3806
Top25_R2: -0.5422
Top25_MAE: 0.0124
Top25_MSE: 0.0003
Top25_Dir_Acc: 0.4213
Top25_Up_Acc: 0.5836
Top25_Down_Acc: 0.3161
Bottom25_R2: -0.5395
Bottom25_MAE: 0.0187
Bottom25_MSE: 0.0007
Bottom25_Dir_Acc: 0.4992
Bottom25_Up_Acc: 0.5923
Bottom25_Down_Acc: 0.3913
Predictions saved to chronos_t5_large_w5.csv

=== Processing Window Size: 21 ===
Test samples: 110,850
Sequence length: 21
Optimized batch size: 512
Starting GPU optimized inference with batch size: 51

GPU Batch Inference: 100%|██████████| 217/217 [01:25<00:00,  2.53it/s]



Inference completed. Final batch size: 512 (original: 512)

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.6544609833107803
Pred -1 ratio: 0.34553901668921966
      Pred+  Pred-
+1 | [37670 20252]
-1 | [34823 18016]

Window 21 processing completed:
Total time: 87.75 seconds
Processing speed: 1263 samples/second

Metrics for window 21:
R2: -0.1211
MAE: 0.0125
MSE: 0.0003
Directional Accuracy: 0.5028
Up_Directional_Acc: 0.6495
Down_Directional_Acc: 0.3402
Top25_R2: -0.1217
Top25_MAE: 0.0108
Top25_MSE: 0.0002
Top25_Dir_Acc: 0.4607
Top25_Up_Acc: 0.6417
Top25_Down_Acc: 0.2952
Bottom25_R2: -0.1180
Bottom25_MAE: 0.0163
Bottom25_MSE: 0.0005
Bottom25_Dir_Acc: 0.5016
Bottom25_Up_Acc: 0.6144
Bottom25_Down_Acc: 0.3693
Predictions saved to chronos_t5_large_w21.csv

=== Processing Window Size: 252 ===
Test samples: 110,850
Sequence length: 252
Optimized batch size: 64
Starting GPU optimized inference with batch size: 

GPU Batch Inference: 100%|██████████| 1733/1733 [13:32<00:00,  2.13it/s]



Inference completed. Final batch size: 64 (original: 64)

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.6190617952187641
Pred -1 ratio: 0.3809382047812359
      Pred+  Pred-
+1 | [35778 22144]
-1 | [32787 20052]

Window 252 processing completed:
Total time: 819.23 seconds
Processing speed: 135 samples/second

Metrics for window 252:
R2: -0.0182
MAE: 0.0120
MSE: 0.0003
Directional Accuracy: 0.5041
Up_Directional_Acc: 0.6161
Down_Directional_Acc: 0.3778
Top25_R2: -0.0177
Top25_MAE: 0.0103
Top25_MSE: 0.0002
Top25_Dir_Acc: 0.5610
Top25_Up_Acc: 0.6154
Top25_Down_Acc: 0.4577
Bottom25_R2: -0.0189
Bottom25_MAE: 0.0156
Bottom25_MSE: 0.0005
Bottom25_Dir_Acc: 0.5062
Bottom25_Up_Acc: 0.5588
Bottom25_Down_Acc: 0.4436
Predictions saved to chronos_t5_large_w252.csv

=== Processing Window Size: 512 ===
Test samples: 110,850
Sequence length: 512
Optimized batch size: 24
Starting GPU optimized inference with batch size: 

GPU Batch Inference: 100%|██████████| 4619/4619 [31:12<00:00,  2.47it/s]



Inference completed. Final batch size: 24 (original: 24)

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.5976003608479927
Pred -1 ratio: 0.4023996391520072
      Pred+  Pred-
+1 | [34638 23284]
-1 | [31554 21285]

Window 512 processing completed:
Total time: 1880.45 seconds
Processing speed: 59 samples/second

Metrics for window 512:
R2: -0.0167
MAE: 0.0119
MSE: 0.0003
Directional Accuracy: 0.5049
Up_Directional_Acc: 0.5962
Down_Directional_Acc: 0.4010
Top25_R2: -0.0153
Top25_MAE: 0.0103
Top25_MSE: 0.0002
Top25_Dir_Acc: 0.5015
Top25_Up_Acc: 0.5837
Top25_Down_Acc: 0.4213
Bottom25_R2: -0.0189
Bottom25_MAE: 0.0155
Bottom25_MSE: 0.0005
Bottom25_Dir_Acc: 0.5043
Bottom25_Up_Acc: 0.5441
Bottom25_Down_Acc: 0.4501
Predictions saved to chronos_t5_large_w512.csv

=== All predictions completed! ===


In [6]:
# ============================================================================
# Cell 3: Save results and generate visualizations
# ============================================================================
import joblib

results_dict = {
    'window_sizes': window_sizes,
    'results': results,
    'model_name': 'Chronos-T5-Large',
    'model_version': 'T4_GPU_Optimized',
    'model_params': {
        'device': device,
        'torch_dtype': 'float16' if torch.cuda.is_available() else 'float32',
        'batch_sizes': {ws: get_batch_size(ws) for ws in window_sizes},
        'gpu_optimized': True
    }
}
joblib.dump(results_dict, "/content/drive/MyDrive/chronos_t5_large_project/chronos_t5_results/results_large.pkl")
print("Results have been saved to results_large.pkl")

metrics_df = pd.DataFrame([
    {**{"Window": window_size}, **results[window_size]["metrics"]}
    for window_size in window_sizes
])
metrics_df.to_csv("/content/drive/MyDrive/chronos_t5_large_project/chronos_t5_results/chronos_t5_large_metrics.csv", index=False)
print("Metrics have been saved to chronos_t5_large_metrics.csv")

print("\n=== Metrics Summary (Large) ===")
print(metrics_df.round(4))
print("\n=== All results (Large) have been saved to Google Drive ===")


Results saved to results_large.pkl
Metrics saved to chronos_t5_large_metrics.csv

=== Metrics Summary (Large) ===
   Window      R2     MAE     MSE  Directional Accuracy  Up_Directional_Acc  \
0       5 -0.5388  0.0143  0.0004                0.4991              0.6065   
1      21 -0.1211  0.0125  0.0003                0.5028              0.6495   
2     252 -0.0182  0.0120  0.0003                0.5041              0.6161   
3     512 -0.0167  0.0119  0.0003                0.5049              0.5962   

   Down_Directional_Acc  Top25_R2  Top25_MAE  Top25_MSE  Top25_Dir_Acc  \
0                0.3806   -0.5422     0.0124     0.0003         0.4213   
1                0.3402   -0.1217     0.0108     0.0002         0.4607   
2                0.3778   -0.0177     0.0103     0.0002         0.5610   
3                0.4010   -0.0153     0.0103     0.0002         0.5015   

   Top25_Up_Acc  Top25_Down_Acc  Bottom25_R2  Bottom25_MAE  Bottom25_MSE  \
0        0.5836          0.3161      -0.539