In [None]:
# ============================================================================
# Uni2TS Small - Google Colab T4 GPU
# ============================================================================

# Step 1: Mount Google Drive and set up environment
from google.colab import drive
drive.mount('/content/drive')

!mkdir -p /content/drive/MyDrive/uni2ts_small_project

%cd /content/drive/MyDrive/uni2ts_small_project

!git clone https://github.com/SalesforceAIResearch/uni2ts
%cd uni2ts

%pip install torch transformers scikit-learn tqdm joblib gluonts lightning pytorch-lightning jaxtyping hydra-core

import torch
print("CUDA is available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Current CUDA device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/uni2ts_small_project
fatal: destination path 'uni2ts' already exists and is not an empty directory.
/content/drive/MyDrive/uni2ts_small_project/uni2ts
CUDA is available: True
Current CUDA device: 0
Device name: Tesla T4


In [None]:
import sys
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import os
import joblib
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from gluonts.dataset.common import ListDataset
sys.path.append("src")
from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
import random

np.random.seed(42)
torch.manual_seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

MODEL_NAME = "Salesforce/moirai-1.1-R-small"
PRED_LEN = 1
PATCH_SIZE = "auto"

print(f"Model: {MODEL_NAME}")
print(f"Prediction length: {PRED_LEN}")

data_path = "/content/drive/MyDrive/ERP Data/all_window_datasets_unscaled.npz"
if os.path.exists(data_path):
    data = np.load(data_path, allow_pickle=True)
    print("Data loaded successfully!")
else:
    print(f"Data file not found: {data_path}")
    print("Please ensure all_window_datasets.npz is uploaded to Google Drive 'ERP Data' folder")

window_sizes = [5, 21, 252, 512]
results = {}

def get_batch_size(window_size):
    """
    Batch size optimized for T4 GPU (16GB).
    Uni2TS model is relatively large, so use conservative batch sizes.
    """
    if window_size <= 5:
        return 4096
    elif window_size <= 21:
        return 4096
    elif window_size <= 252:
        return 512
    elif window_size <= 512:
        return 256
    else:
        return 4

print("T4 GPU optimized batch size configuration:")
for ws in window_sizes:
    batch_size = get_batch_size(ws)
    print(f"  Window {ws}: batch size = {batch_size}")


PyTorch version: 2.6.0+cu124
CUDA available: True
Using device: cuda
Model: Salesforce/moirai-1.1-R-small
Prediction length: 1
Data loaded successfully!
T4 GPU optimized batch size configuration:
  Window 5: batch size = 4096
  Window 21: batch size = 4096
  Window 252: batch size = 512
  Window 512: batch size = 256


In [None]:
# ============================================================================
# Prediction
# ============================================================================

def r2_zero(y_true, y_pred):
    """
    Calculate R² (zero-based, baseline is 0)
    y_true: true values array (N,)
    y_pred: predicted values array (N,)
    """
    rss = np.sum((y_true - y_pred)**2)
    tss = np.sum(y_true**2)
    return 1 - rss / tss

def calc_directional_metrics(y_true, y_pred, permnos=None):
    """Calculate directional accuracy metrics"""
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    if permnos is None:
        s_true = np.sign(y_true)
        s_pred = np.sign(y_pred)
        mask = s_true != 0
        s_true = s_true[mask]
        s_pred = s_pred[mask]
        overall_acc = np.mean(s_true == s_pred)
        up_mask = s_true > 0
        down_mask = s_true < 0
        up_acc = np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else 0
        down_acc = np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else 0
    else:
        df = pd.DataFrame({"permno": permnos, "yt": y_true, "yp": y_pred})
        overall_accs = []
        up_accs = []
        down_accs = []
        for _, g in df.groupby("permno"):
            s_true = np.sign(g["yt"].values)
            s_pred = np.sign(g["yp"].values)
            mask = s_true != 0
            s_true = s_true[mask]
            s_pred = s_pred[mask]
            if len(s_true) == 0:
                continue
            overall_accs.append(np.mean(s_true == s_pred))
            up_mask = s_true > 0
            down_mask = s_true < 0
            up_accs.append(np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else np.nan)
            down_accs.append(np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else np.nan)
        overall_acc = np.nanmean(overall_accs)
        up_acc = np.nanmean(up_accs)
        down_acc = np.nanmean(down_accs)
    return overall_acc, up_acc, down_acc

def r2_traditional(y_true, y_pred):
    """
    Calculate traditional R² (mean-based)
    """
    rss = np.sum((y_true - y_pred)**2)
    tss = np.sum((y_true - y_true.mean())**2)
    return 1 - rss / tss if tss != 0 else float('-inf')

def calculate_metrics(y_true, y_pred, permnos=None, meta=None):
    """Calculate evaluation metrics"""
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    dir_acc, up_acc, down_acc = calc_directional_metrics(y_true, y_pred, permnos)
    r2_zero_based = r2_zero(y_true, y_pred)
    r2_trad = r2_traditional(y_true, y_pred)

    metrics = {
        "R2_zero": r2_zero_based,
        "R2_traditional": r2_trad,
        "MAE": mae,
        "MSE": mse,
        "Directional Accuracy": dir_acc,
        "Up_Directional_Acc": up_acc,
        "Down_Directional_Acc": down_acc
    }

    # Market cap group metrics
    if meta is not None and "MKTCAP_PERCENTILE" in meta:
        top_mask = meta["MKTCAP_PERCENTILE"] >= 0.75
        bottom_mask = meta["MKTCAP_PERCENTILE"] <= 0.25

        if np.any(top_mask):
            yt_top = y_true[top_mask]
            yp_top = y_pred[top_mask]
            perm_top = permnos[top_mask] if permnos is not None else None
            r2_zero_top = r2_zero(yt_top, yp_top)
            r2_trad_top = r2_traditional(yt_top, yp_top)
            mae_top = mean_absolute_error(yt_top, yp_top)
            mse_top = mean_squared_error(yt_top, yp_top)
            dir_top, up_top, down_top = calc_directional_metrics(yt_top, yp_top, perm_top)
            metrics.update({
                "Top25_R2_zero": r2_zero_top,
                "Top25_R2_traditional": r2_trad_top,
                "Top25_MAE": mae_top,
                "Top25_MSE": mse_top,
                "Top25_Dir_Acc": dir_top,
                "Top25_Up_Acc": up_top,
                "Top25_Down_Acc": down_top
            })

        if np.any(bottom_mask):
            yt_bot = y_true[bottom_mask]
            yp_bot = y_pred[bottom_mask]
            perm_bot = permnos[bottom_mask] if permnos is not None else None
            r2_zero_bot = r2_zero(yt_bot, yp_bot)
            r2_trad_bot = r2_traditional(yt_bot, yp_bot)
            mae_bot = mean_absolute_error(yt_bot, yp_bot)
            mse_bot = mean_squared_error(yt_bot, yp_bot)
            dir_bot, up_bot, down_bot = calc_directional_metrics(yt_bot, yp_bot, perm_bot)
            metrics.update({
                "Bottom25_R2_zero": r2_zero_bot,
                "Bottom25_R2_traditional": r2_trad_bot,
                "Bottom25_MAE": mae_bot,
                "Bottom25_MSE": mse_bot,
                "Bottom25_Dir_Acc": dir_bot,
                "Bottom25_Up_Acc": up_bot,
                "Bottom25_Down_Acc": down_bot
            })

    return metrics


In [None]:
def batch_predict_uni2ts(X_test, predictor, batch_size):
    """T4 GPU optimized batch inference function"""
    num_samples = len(X_test)
    all_predictions = np.zeros(num_samples)
    original_batch_size = batch_size

    print(f"Starting GPU optimized inference with batch size: {batch_size}")

    for i in tqdm(range(0, num_samples, batch_size), desc="GPU Batch Inference"):
        try:
            batch_end = min(i + batch_size, num_samples)
            batch_X = X_test[i:batch_end]

            # Prepare GluonTS data format
            batch_data = []
            for j in range(len(batch_X)):
                target = batch_X[j].flatten()

                data_entry = {
                    "target": target.tolist(),
                    "start": pd.Timestamp("2000-01-01"),
                    "item_id": f"item_{i+j}",
                }
                batch_data.append(data_entry)

            dataset = ListDataset(batch_data, freq="D")
            forecasts = list(predictor.predict(dataset))

            for k, forecast in enumerate(forecasts):
                all_predictions[i+k] = forecast.quantile(0.5)[0]

            if i % (batch_size * 10) == 0 and torch.cuda.is_available():
                torch.cuda.empty_cache()

        except RuntimeError as e:
            if "out of memory" in str(e).lower():
                print(f"\nOOM detected. Reducing batch size from {batch_size} to {batch_size//2}")
                torch.cuda.empty_cache()
                batch_size = max(batch_size // 2, 1)

                # Retry current batch
                batch_end = min(i + batch_size, num_samples)
                batch_X = X_test[i:batch_end]

                batch_data = []
                for j in range(len(batch_X)):
                    target = batch_X[j].flatten()

                    data_entry = {
                      "target": target.tolist(),
                      "start": pd.Timestamp("2000-01-01"),
                      "item_id": f"item_{i+j}",
                    }
                    batch_data.append(data_entry)

                dataset = ListDataset(batch_data, freq="D")
                forecasts = list(predictor.predict(dataset))

                for k, forecast in enumerate(forecasts):
                    all_predictions[i+k] = all_predictions[i+k] = forecast.quantile(0.5)[0]
            else:
                raise e

    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    print(f"\nInference completed. Final batch size: {batch_size} (original: {original_batch_size})")
    return all_predictions

!mkdir -p /content/drive/MyDrive/uni2ts_small_project/uni2ts_results
!mkdir -p /content/drive/MyDrive/uni2ts_small_project/uni2ts_predictions

print("Starting T4 GPU optimized prediction...")
print(f"Data file contains keys: {list(data.keys())}")

for window_size in window_sizes:
    print(f"\n=== Processing Window Size: {window_size} ===")
    start_time = time.time()

    X_train = data[f"X_train_{window_size}"]
    y_train = data[f"y_train_{window_size}"]
    X_test = data[f"X_test_{window_size}"]
    y_test = data[f"y_test_{window_size}"]
    meta_test = pd.DataFrame(data[f"meta_test_{window_size}"].item())

    current_batch_size = get_batch_size(window_size)

    print(f"Test samples: {len(X_test):,}")
    print("X_test shape:", X_test.shape)
    print(f"Optimized batch size: {current_batch_size}")

    print("Initializing Uni2TS model...")
    model = MoiraiForecast(
        module=MoiraiModule.from_pretrained(MODEL_NAME),
        prediction_length=PRED_LEN,
        context_length=window_size,
        patch_size=PATCH_SIZE,
        num_samples=100,
        target_dim=1,
        feat_dynamic_real_dim=0,
        past_feat_dynamic_real_dim=0,
    )

    try:
        model = model.to(device)
        print(f"Model loaded on: {device}")
    except Exception as e:
        print(f"Failed to load on {device}, using CPU: {str(e)}")
        model = model.to("cpu")
        device = "cpu"

    predictor = model.create_predictor(batch_size=current_batch_size)

    print("Starting batch inference...")
    all_predictions = batch_predict_uni2ts(X_test, predictor, current_batch_size)

    print("\n=== Quick value scale check ===")
    print("y_true mean/std/min/max:", f"{y_test.mean():.6f}", f"{y_test.std():.6f}", f"{y_test.min():.6f}", f"{y_test.max():.6f}")
    print("y_pred mean/std/min/max:", f"{all_predictions.mean():.6f}", f"{all_predictions.std():.6f}", f"{all_predictions.min():.6f}", f"{all_predictions.max():.6f}")

    print("\n=== Correlation analysis (based on training subset) ===")
    print(f"Running inference on training subset for correlation analysis...")
    calib_size = min(len(X_train), 5000)
    calib_indices = np.random.choice(len(X_train), calib_size, replace=False)
    X_calib = X_train[calib_indices]
    y_calib_true = y_train[calib_indices]

    calib_predictions = batch_predict_uni2ts(X_calib, predictor, min(current_batch_size, 512))

    print(f"Analysis set statistics:")
    print(f"  Set size: {len(y_calib_true)}")
    print(f"  y_true: mean={np.mean(y_calib_true):.6f}, std={np.std(y_calib_true):.6f}")
    print(f"  y_pred: mean={np.mean(calib_predictions):.6f}, std={np.std(calib_predictions):.6f}")

    correlation = np.corrcoef(y_calib_true, calib_predictions)[0, 1]
    print(f"  Correlation coefficient: {correlation:.6f}")

    y_pred_std = np.std(calib_predictions)
    y_true_std = np.std(y_calib_true)
    std_ratio = y_true_std / y_pred_std if y_pred_std > 1e-12 else float('inf')
    print(f"  Std ratio (true/pred): {std_ratio:.6f}")

    mean_diff = np.mean(y_calib_true) - np.mean(calib_predictions)
    print(f"  Mean difference (true - pred): {mean_diff:.6f}")

    r2_train_subset = r2_zero(y_calib_true, calib_predictions)
    print(f"  R² on training subset: {r2_train_subset:.6f}")

    print("\nCalculating evaluation metrics...")
    permnos_test = meta_test["PERMNO"].values
    try:
        metrics = calculate_metrics(y_test, all_predictions, permnos_test, meta_test)
    except Exception as err:
        print(f" calculate_metrics failed: {err}")
        metrics = {}

    print("\n=== Directional Sanity Check ===")
    print("Pos ratio (y_test):", (y_test > 0).mean())
    print("Neg ratio (y_test):", (y_test < 0).mean())
    sign_pred = np.sign(all_predictions)
    print("Pred +1 ratio:", (sign_pred > 0).mean())
    print("Pred -1 ratio:", (sign_pred < 0).mean())

    from sklearn.metrics import confusion_matrix
    conf = confusion_matrix(np.sign(y_test), sign_pred, labels=[1, -1])
    print("      Pred+  Pred-")
    print("+1 |", conf[0])
    print("-1 |", conf[1])

    results[window_size] = {
        'predictions': all_predictions,
        'true_values': y_test,
        'metrics': metrics,
        'meta': meta_test,
        'analysis_stats': {
            'correlation': correlation,
            'std_ratio': std_ratio,
            'mean_diff': mean_diff,
            'r2_train_subset': r2_train_subset
        }
    }

    end_time = time.time()
    elapsed_time = end_time - start_time
    samples_per_second = len(X_test) / elapsed_time

    print(f"\nWindow {window_size} processing completed:")
    print(f"Total time: {elapsed_time:.2f} seconds")
    print(f"Processing speed: {samples_per_second:.0f} samples/second")

    print(f"\nMetrics for window {window_size}:")
    if metrics:
        for metric_name, value in metrics.items():
            print(f"{metric_name}: {value:.4f}")
    else:
        print("  (metrics is empty, skipped)")

    results_df = pd.DataFrame({
        'PERMNO': meta_test['PERMNO'],
        'y_true': y_test,
        'y_pred': all_predictions
    })
    results_df.to_csv(f'/content/drive/MyDrive/uni2ts_small_project/uni2ts_predictions/uni2ts_small_w{window_size}.csv', index=False)
    print(f"Predictions saved to uni2ts_small_w{window_size}.csv")

    try:
        del model, predictor
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    except Exception as e:
        print(f"Warning: Memory cleanup failed: {str(e)}")

print("\n=== All predictions completed! ===")


In [None]:
# ============================================================================
# Save results and generate visualizations
# ============================================================================

results_dict = {
    'window_sizes': window_sizes,
    'results': results,
    'model_name': 'Uni2TS-Small',
    'model_version': 'T4_GPU_Optimized_NoCalibration',
    'model_params': {
        'device': device,
        'batch_sizes': {ws: get_batch_size(ws) for ws in window_sizes},
        'gpu_optimized': True,
        'calibration_applied': False
    }
}
joblib.dump(results_dict, "/content/drive/MyDrive/uni2ts_small_project/uni2ts_results/results.pkl")
print("Results saved to results.pkl")

# Only generate summary if all windows have valid metrics
valid_results = {ws: results[ws] for ws in window_sizes if results[ws]['metrics']}

if valid_results:
    metrics_df = pd.DataFrame([
        {**{"Window": window_size}, **valid_results[window_size]["metrics"]}
        for window_size in valid_results.keys()
    ])
    metrics_df.to_csv("/content/drive/MyDrive/uni2ts_small_project/uni2ts_results/uni2ts_small_metrics.csv", index=False)
    print("Metrics saved to uni2ts_small_metrics.csv")

    print("\n=== Metrics Summary ===")
    print(metrics_df.round(4))

    metrics_to_plot = ["R²", "MAE", "MSE", "Directional Accuracy", "Up_Directional_Acc", "Down_Directional_Acc"]
    metric_names = {
        "R²": "R²",
        "MAE": "Mean Absolute Error (MAE)",
        "MSE": "Mean Squared Error (MSE)",
        "Directional Accuracy": "Directional Accuracy",
        "Up_Directional_Acc": "Up Directional Accuracy",
        "Down_Directional_Acc": "Down Directional Accuracy"
    }

    available_metrics = [m for m in metrics_to_plot if m in metrics_df.columns]

    if available_metrics:
        fig, axs = plt.subplots(3, 2, figsize=(14, 12))
        axs = axs.flatten()

        for idx, metric in enumerate(available_metrics[:6]):
            ax = axs[idx]
            ax.plot(metrics_df["Window"], metrics_df[metric],
                    marker='o', linestyle='-', linewidth=2, color='#1f77b4')

            ax.set_title(metric_names[metric], fontsize=14, weight="bold")
            ax.set_xlabel("Window Size", fontsize=12)
            ax.set_ylabel(metric_names[metric], fontsize=12)
            ax.set_xticks(list(valid_results.keys()))
            ax.grid(True, linestyle='--', alpha=0.6)

        for idx in range(len(available_metrics), 6):
            axs[idx].set_visible(False)

        plt.tight_layout()
        plt.savefig("/content/drive/MyDrive/uni2ts_small_project/uni2ts_results/uni2ts_small_metrics.png", dpi=300, bbox_inches='tight')
        plt.show()
else:
    print("No valid metrics found, skipping visualization")

print("\n=== All results saved to Google Drive! ===")
print("Files saved to: /content/drive/MyDrive/uni2ts_small_project/")
