In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create project directory
!mkdir -p /content/drive/MyDrive/timesfm1.0_project

# Change to project directory
%cd /content/drive/MyDrive/timesfm1.0_project
!git clone https://github.com/google-research/timesfm
%cd timesfm

# Install required dependencies
%pip install timesfm
%pip install scikit-learn
%pip install pandas
%pip install numpy
%pip install matplotlib
%pip install tqdm
%pip install joblib

# Check if CUDA is available
import torch
print("CUDA is available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Current CUDA device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))
else:
    print("Using CPU for computations")


Mounted at /content/drive
/content/drive/MyDrive/timesfm1.0_project
Cloning into 'timesfm'...
remote: Enumerating objects: 933, done.[K
remote: Counting objects: 100% (437/437), done.[K
remote: Compressing objects: 100% (175/175), done.[K
remote: Total 933 (delta 334), reused 262 (delta 262), pack-reused 496 (from 2)[K
Receiving objects: 100% (933/933), 2.25 MiB | 11.79 MiB/s, done.
Resolving deltas: 100% (496/496), done.
/content/drive/MyDrive/timesfm1.0_project/timesfm
Collecting timesfm
  Downloading timesfm-1.3.0-py3-none-any.whl.metadata (15 kB)
Collecting einshape>=1.0.0 (from timesfm)
  Downloading einshape-1.0-py3-none-any.whl.metadata (706 bytes)
Collecting utilsforecast>=0.1.10 (from timesfm)
  Downloading utilsforecast-0.2.12-py3-none-any.whl.metadata (7.6 kB)
Collecting InquirerPy==0.3.4 (from huggingface_hub[cli]>=0.23.0->timesfm)
  Downloading InquirerPy-0.3.4-py3-none-any.whl.metadata (8.1 kB)
Collecting pfzy<0.4.0,>=0.3.1 (from InquirerPy==0.3.4->huggingface_hub[cli

In [None]:
import sys
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import timesfm
import os
import joblib
import random

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

try:
    backend_type = "gpu" if torch.cuda.is_available() else "cpu"
    tfm = timesfm.TimesFm(
        hparams=timesfm.TimesFmHparams(
            backend=backend_type,
            per_core_batch_size=512,
            horizon_len=1,
        ),
        checkpoint=timesfm.TimesFmCheckpoint(
            huggingface_repo_id="google/timesfm-1.0-200m-pytorch"
        )
    )
    print(f"Successfully initialized TimesFM model using {backend_type}")
except Exception as e:
    print(f"GPU initialization failed, error: {str(e)}")
    print("Trying CPU initialization...")
    tfm = timesfm.TimesFm(
        hparams=timesfm.TimesFmHparams(
            backend="cpu",
            per_core_batch_size=256,
            horizon_len=1,
        ),
        checkpoint=timesfm.TimesFmCheckpoint(
            huggingface_repo_id="google/timesfm-1.0-200m-pytorch"
        )
    )
    print("CPU initialization successful")

print("TimesFM model loaded successfully!")

data_path = "/content/drive/MyDrive/ERP Data/all_window_datasets_unscaled.npz"
if os.path.exists(data_path):
    data = np.load(data_path, allow_pickle=True)
    print("Data loaded successfully!")
else:
    print(f"Data file not found: {data_path}")
    print("Please ensure all_window_datasets.npz is uploaded to Google Drive 'ERP Data' folder")

window_sizes = [5, 21, 252, 512]
results = {}


 See https://github.com/google-research/timesfm/blob/master/README.md for updated APIs.
Loaded PyTorch TimesFM, likely because python version is 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0].
PyTorch version: 2.6.0+cu124
CUDA available: True
Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

.gitattributes: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

torch_model.ckpt:   0%|          | 0.00/814M [00:00<?, ?B/s]

Successfully initialized TimesFM model using gpu
TimesFM model loaded successfully!
Data loaded successfully!


In [None]:
# Evaluation functions (consistent with Linear Models)
def r2_zero(y_true, y_pred):
    """
    Compute zero-based R² (baseline is 0)
    y_true: array of true values (N,)
    y_pred: array of predicted values (N,)
    """
    rss = np.sum((y_true - y_pred)**2)
    tss = np.sum(y_true**2)
    return 1 - rss / tss

def calc_directional_metrics(y_true, y_pred, permnos=None):
    """
    Compute directional accuracy metrics (consistent with Linear Models)
    - Sign prediction at sample level
    - If grouped by stock, compute Overall, Up, Down for each stock and then average
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    if permnos is None:
        s_true = np.sign(y_true)
        s_pred = np.sign(y_pred)
        mask = s_true != 0
        s_true = s_true[mask]
        s_pred = s_pred[mask]

        overall_acc = np.mean(s_true == s_pred)

        up_mask = s_true > 0
        down_mask = s_true < 0
        up_acc = np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else 0
        down_acc = np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else 0

    else:
        df = pd.DataFrame({"permno": permnos, "yt": y_true, "yp": y_pred})
        overall_accs = []
        up_accs = []
        down_accs = []

        for _, g in df.groupby("permno"):
            s_true = np.sign(g["yt"].values)
            s_pred = np.sign(g["yp"].values)
            mask = s_true != 0
            s_true = s_true[mask]
            s_pred = s_pred[mask]
            if len(s_true) == 0:
                continue
            overall_accs.append(np.mean(s_true == s_pred))

            up_mask = s_true > 0
            down_mask = s_true < 0
            up_accs.append(np.mean(s_true[up_mask] == s_pred[up_mask]) if np.any(up_mask) else np.nan)
            down_accs.append(np.mean(s_true[down_mask] == s_pred[down_mask]) if np.any(down_mask) else np.nan)

        overall_acc = np.nanmean(overall_accs)
        up_acc = np.nanmean(up_accs)
        down_acc = np.nanmean(down_accs)

    return overall_acc, up_acc, down_acc

def calculate_metrics(y_true, y_pred, permnos=None, meta=None):
    """Compute evaluation metrics (consistent with Linear Models)"""
    from sklearn.metrics import mean_squared_error, mean_absolute_error

    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    n = len(y_true)
    k = y_pred.shape[-1] if len(y_pred.shape) > 1 else 1

    r2 = r2_zero(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)

    dir_acc, up_acc, down_acc = calc_directional_metrics(y_true, y_pred, permnos)

    metrics = {
        "R²": r2,
        "MAE": mae,
        "MSE": mse,
        "Directional Accuracy": dir_acc,
        "Up_Directional_Acc": up_acc,
        "Down_Directional_Acc": down_acc
    }

    # Market cap group metrics
    if meta is not None and "MKTCAP_PERCENTILE" in meta:
        top_mask = meta["MKTCAP_PERCENTILE"] >= 0.75
        bottom_mask = meta["MKTCAP_PERCENTILE"] <= 0.25

        if np.any(top_mask):
            yt_top = y_true[top_mask]
            yp_top = y_pred[top_mask]
            perm_top = permnos[top_mask] if permnos is not None else None
            r2_top = r2_zero(yt_top, yp_top)
            mae_top = mean_absolute_error(yt_top, yp_top)
            mse_top = mean_squared_error(yt_top, yp_top)
            dir_top, up_top, down_top = calc_directional_metrics(yt_top, yp_top, perm_top)
            metrics.update({
                "Top25_R2": r2_top,
                "Top25_MAE": mae_top,
                "Top25_MSE": mse_top,
                "Top25_Dir_Acc": dir_top,
                "Top25_Up_Acc": up_top,
                "Top25_Down_Acc": down_top
            })

        if np.any(bottom_mask):
            yt_bot = y_true[bottom_mask]
            yp_bot = y_pred[bottom_mask]
            perm_bot = permnos[bottom_mask] if permnos is not None else None
            r2_bot = r2_zero(yt_bot, yp_bot)
            mae_bot = mean_absolute_error(yt_bot, yp_bot)
            mse_bot = mean_squared_error(yt_bot, yp_bot)
            dir_bot, up_bot, down_bot = calc_directional_metrics(yt_bot, yp_bot, perm_bot)
            metrics.update({
                "Bottom25_R2": r2_bot,
                "Bottom25_MAE": mae_bot,
                "Bottom25_MSE": mse_bot,
                "Bottom25_Dir_Acc": dir_bot,
                "Bottom25_Up_Acc": up_bot,
                "Bottom25_Down_Acc": down_bot
            })

    return metrics


In [None]:

# Create output directories
!mkdir -p /content/drive/MyDrive/timesfm1.0_project/timesfm1.0_results
!mkdir -p /content/drive/MyDrive/timesfm1.0_project/timesfm1.0_predictions

# Check if data is loaded
if 'data' not in locals() or data is None:
    raise NameError("Data not properly loaded! Please run the data loading cell above and ensure the data file exists.")

print("Starting prediction...")
print(f"Data file contains keys: {list(data.keys())}")

for window_size in window_sizes:
    print(f"\n=== Processing Window Size: {window_size} ===")

    X_test = data[f"X_test_{window_size}"]
    y_test = data[f"y_test_{window_size}"]
    meta_test = pd.DataFrame(data[f"meta_test_{window_size}"].item())

    print(f"Test samples: {len(X_test)}")
    print(f"Input sequence length: {X_test.shape[1]}")

    all_predictions = []

    batch_size = 1024
    num_batches = (len(X_test) + batch_size - 1) // batch_size

    for batch_idx in tqdm(range(num_batches), desc="Predicting batches"):
        start_idx = batch_idx * batch_size
        end_idx = min((batch_idx + 1) * batch_size, len(X_test))

        try:
            batch_sequences = [X_test[i] for i in range(start_idx, end_idx)]
            freq_batch = [0] * len(batch_sequences)

            point_forecast, _ = tfm.forecast(
                batch_sequences,
                freq=freq_batch,
            )

            batch_predictions = point_forecast[:, 0].tolist()
            all_predictions.extend(batch_predictions)

        except Exception as e:
            print(f"Warning: Batch prediction failed for batch {batch_idx}, falling back to individual predictions. Error: {str(e)}")
            batch_predictions = []
            for i in range(start_idx, end_idx):
                try:
                    sequence = X_test[i].reshape(1, -1)
                    point_forecast, _ = tfm.forecast(sequence, freq=[0])
                    batch_predictions.append(point_forecast[0][0])
                except:
                    batch_predictions.append(0.0)
            all_predictions.extend(batch_predictions)

        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        if batch_idx % 10 == 0:
            processed = min(end_idx, len(X_test))
            print(f"Processed: {processed}/{len(X_test)} samples ({processed/len(X_test)*100:.1f}%)")

    all_predictions = np.array(all_predictions)

    print(f"Predictions completed: {len(all_predictions)} samples")

    permnos_test = meta_test["PERMNO"].values
    metrics = calculate_metrics(y_test, all_predictions, permnos_test, meta_test)

    print("\n=== Directional Sanity Check ===")
    print("Pos ratio (y_test):", (y_test > 0).mean())
    print("Neg ratio (y_test):", (y_test < 0).mean())
    sign_pred = np.sign(all_predictions)
    print("Pred +1 ratio:", (sign_pred > 0).mean())
    print("Pred -1 ratio:", (sign_pred < 0).mean())

    from sklearn.metrics import confusion_matrix
    conf = confusion_matrix(np.sign(y_test), sign_pred, labels=[1, -1])
    print("      Pred+  Pred-")
    print("+1 |", conf[0])
    print("-1 |", conf[1])

    results[window_size] = {
        'predictions': all_predictions,
        'true_values': y_test,
        'metrics': metrics,
        'meta': meta_test
    }

    print(f"\nMetrics for window {window_size}:")
    for metric_name, value in metrics.items():
        print(f"{metric_name}: {value:.4f}")

    results_df = pd.DataFrame({
        'PERMNO': meta_test['PERMNO'],
        'y_true': y_test,
        'y_pred': all_predictions
    })
    results_df.to_csv(f'/content/drive/MyDrive/timesfm1.0_project/timesfm1.0_predictions/timesfm1.0_w{window_size}.csv', index=False)
    print(f"Predictions saved to timesfm1.0_w{window_size}.csv")

print("\n=== All predictions completed! ===")


Starting prediction...
Data file contains keys: ['X_train_5', 'y_train_5', 'meta_train_5', 'market_caps_train_5', 'X_test_5', 'y_test_5', 'meta_test_5', 'market_caps_test_5', 'X_train_21', 'y_train_21', 'meta_train_21', 'market_caps_train_21', 'X_test_21', 'y_test_21', 'meta_test_21', 'market_caps_test_21', 'X_train_252', 'y_train_252', 'meta_train_252', 'market_caps_train_252', 'X_test_252', 'y_test_252', 'meta_test_252', 'market_caps_test_252', 'X_train_512', 'y_train_512', 'meta_train_512', 'market_caps_train_512', 'X_test_512', 'y_test_512', 'meta_test_512', 'market_caps_test_512']

=== Processing Window Size: 5 ===
Test samples: 110850
Input sequence length: 5


Predicting batches:   1%|          | 1/109 [00:03<05:40,  3.16s/it]

Processed: 1024/110850 samples (0.9%)


Predicting batches:  10%|█         | 11/109 [00:20<02:54,  1.78s/it]

Processed: 11264/110850 samples (10.2%)


Predicting batches:  19%|█▉        | 21/109 [00:38<02:39,  1.81s/it]

Processed: 21504/110850 samples (19.4%)


Predicting batches:  28%|██▊       | 31/109 [00:57<02:26,  1.88s/it]

Processed: 31744/110850 samples (28.6%)


Predicting batches:  38%|███▊      | 41/109 [01:16<02:13,  1.97s/it]

Processed: 41984/110850 samples (37.9%)


Predicting batches:  47%|████▋     | 51/109 [01:37<01:57,  2.03s/it]

Processed: 52224/110850 samples (47.1%)


Predicting batches:  56%|█████▌    | 61/109 [01:56<01:33,  1.95s/it]

Processed: 62464/110850 samples (56.4%)


Predicting batches:  65%|██████▌   | 71/109 [02:16<01:13,  1.92s/it]

Processed: 72704/110850 samples (65.6%)


Predicting batches:  74%|███████▍  | 81/109 [02:35<00:54,  1.95s/it]

Processed: 82944/110850 samples (74.8%)


Predicting batches:  83%|████████▎ | 91/109 [02:55<00:35,  1.96s/it]

Processed: 93184/110850 samples (84.1%)


Predicting batches:  93%|█████████▎| 101/109 [03:14<00:15,  1.96s/it]

Processed: 103424/110850 samples (93.3%)


Predicting batches: 100%|██████████| 109/109 [03:29<00:00,  1.92s/it]


Predictions completed: 110850 samples

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.4636084799278304
Pred -1 ratio: 0.5363915200721696
      Pred+  Pred-
+1 | [26460 31462]
-1 | [24892 27947]

Metrics for window 5:
R²: -0.3359
MAE: 0.0137
MSE: 0.0004
Directional Accuracy: 0.4912
Up_Directional_Acc: 0.4561
Down_Directional_Acc: 0.5282
Top25_R2: -0.3412
Top25_MAE: 0.0119
Top25_MSE: 0.0003
Top25_Dir_Acc: 0.4942
Top25_Up_Acc: 0.4581
Top25_Down_Acc: 0.5133
Bottom25_R2: -0.3259
Bottom25_MAE: 0.0178
Bottom25_MSE: 0.0006
Bottom25_Dir_Acc: 0.4911
Bottom25_Up_Acc: 0.4324
Bottom25_Down_Acc: 0.5460
Predictions saved to timesfm1.0_w5.csv

=== Processing Window Size: 21 ===
Test samples: 110850
Input sequence length: 21


Predicting batches:   1%|          | 1/109 [00:01<03:31,  1.96s/it]

Processed: 1024/110850 samples (0.9%)


Predicting batches:  10%|█         | 11/109 [00:21<03:12,  1.96s/it]

Processed: 11264/110850 samples (10.2%)


Predicting batches:  19%|█▉        | 21/109 [00:41<02:53,  1.97s/it]

Processed: 21504/110850 samples (19.4%)


Predicting batches:  28%|██▊       | 31/109 [01:00<02:32,  1.95s/it]

Processed: 31744/110850 samples (28.6%)


Predicting batches:  38%|███▊      | 41/109 [01:20<02:11,  1.94s/it]

Processed: 41984/110850 samples (37.9%)


Predicting batches:  47%|████▋     | 51/109 [01:39<01:52,  1.94s/it]

Processed: 52224/110850 samples (47.1%)


Predicting batches:  56%|█████▌    | 61/109 [01:58<01:33,  1.95s/it]

Processed: 62464/110850 samples (56.4%)


Predicting batches:  65%|██████▌   | 71/109 [02:18<01:14,  1.96s/it]

Processed: 72704/110850 samples (65.6%)


Predicting batches:  74%|███████▍  | 81/109 [02:38<00:54,  1.95s/it]

Processed: 82944/110850 samples (74.8%)


Predicting batches:  83%|████████▎ | 91/109 [02:57<00:35,  1.95s/it]

Processed: 93184/110850 samples (84.1%)


Predicting batches:  93%|█████████▎| 101/109 [03:17<00:15,  1.96s/it]

Processed: 103424/110850 samples (93.3%)


Predicting batches: 100%|██████████| 109/109 [03:31<00:00,  1.94s/it]


Predictions completed: 110850 samples

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.47686062246278754
Pred -1 ratio: 0.5231393775372124
      Pred+  Pred-
+1 | [27289 30633]
-1 | [25529 27310]

Metrics for window 21:
R²: -0.1422
MAE: 0.0127
MSE: 0.0003
Directional Accuracy: 0.4929
Up_Directional_Acc: 0.4702
Down_Directional_Acc: 0.5158
Top25_R2: -0.1386
Top25_MAE: 0.0109
Top25_MSE: 0.0002
Top25_Dir_Acc: 0.4939
Top25_Up_Acc: 0.4334
Top25_Down_Acc: 0.5171
Bottom25_R2: -0.1401
Bottom25_MAE: 0.0165
Bottom25_MSE: 0.0005
Bottom25_Dir_Acc: 0.4927
Bottom25_Up_Acc: 0.4172
Bottom25_Down_Acc: 0.5694
Predictions saved to timesfm1.0_w21.csv

=== Processing Window Size: 252 ===
Test samples: 110850
Input sequence length: 252


Predicting batches:   1%|          | 1/109 [00:01<03:35,  1.99s/it]

Processed: 1024/110850 samples (0.9%)


Predicting batches:  10%|█         | 11/109 [00:22<03:20,  2.04s/it]

Processed: 11264/110850 samples (10.2%)


Predicting batches:  19%|█▉        | 21/109 [00:42<02:59,  2.04s/it]

Processed: 21504/110850 samples (19.4%)


Predicting batches:  28%|██▊       | 31/109 [01:02<02:35,  2.00s/it]

Processed: 31744/110850 samples (28.6%)


Predicting batches:  38%|███▊      | 41/109 [01:22<02:15,  1.99s/it]

Processed: 41984/110850 samples (37.9%)


Predicting batches:  47%|████▋     | 51/109 [01:42<01:56,  2.01s/it]

Processed: 52224/110850 samples (47.1%)


Predicting batches:  56%|█████▌    | 61/109 [02:02<01:36,  2.02s/it]

Processed: 62464/110850 samples (56.4%)


Predicting batches:  65%|██████▌   | 71/109 [02:23<01:16,  2.01s/it]

Processed: 72704/110850 samples (65.6%)


Predicting batches:  74%|███████▍  | 81/109 [02:43<00:56,  2.01s/it]

Processed: 82944/110850 samples (74.8%)


Predicting batches:  83%|████████▎ | 91/109 [03:03<00:35,  2.00s/it]

Processed: 93184/110850 samples (84.1%)


Predicting batches:  93%|█████████▎| 101/109 [03:23<00:16,  2.00s/it]

Processed: 103424/110850 samples (93.3%)


Predicting batches: 100%|██████████| 109/109 [03:38<00:00,  2.00s/it]


Predictions completed: 110850 samples

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.5669102390617953
Pred -1 ratio: 0.4330897609382048
      Pred+  Pred-
+1 | [32653 25269]
-1 | [30128 22711]

Metrics for window 252:
R²: -0.0239
MAE: 0.0120
MSE: 0.0003
Directional Accuracy: 0.4998
Up_Directional_Acc: 0.5616
Down_Directional_Acc: 0.4276
Top25_R2: -0.0237
Top25_MAE: 0.0103
Top25_MSE: 0.0002
Top25_Dir_Acc: 0.4903
Top25_Up_Acc: 0.5893
Top25_Down_Acc: 0.4159
Bottom25_R2: -0.0256
Bottom25_MAE: 0.0156
Bottom25_MSE: 0.0005
Bottom25_Dir_Acc: 0.5053
Bottom25_Up_Acc: 0.4745
Bottom25_Down_Acc: 0.5265
Predictions saved to timesfm1.0_w252.csv

=== Processing Window Size: 512 ===
Test samples: 110850
Input sequence length: 512


Predicting batches:   1%|          | 1/109 [00:01<03:34,  1.99s/it]

Processed: 1024/110850 samples (0.9%)


Predicting batches:  10%|█         | 11/109 [00:22<03:20,  2.04s/it]

Processed: 11264/110850 samples (10.2%)


Predicting batches:  19%|█▉        | 21/109 [00:43<03:04,  2.09s/it]

Processed: 21504/110850 samples (19.4%)


Predicting batches:  28%|██▊       | 31/109 [01:03<02:35,  1.99s/it]

Processed: 31744/110850 samples (28.6%)


Predicting batches:  38%|███▊      | 41/109 [01:22<02:13,  1.96s/it]

Processed: 41984/110850 samples (37.9%)


Predicting batches:  47%|████▋     | 51/109 [01:42<01:55,  1.99s/it]

Processed: 52224/110850 samples (47.1%)


Predicting batches:  56%|█████▌    | 61/109 [02:02<01:37,  2.03s/it]

Processed: 62464/110850 samples (56.4%)


Predicting batches:  65%|██████▌   | 71/109 [02:23<01:16,  2.02s/it]

Processed: 72704/110850 samples (65.6%)


Predicting batches:  74%|███████▍  | 81/109 [02:42<00:55,  1.99s/it]

Processed: 82944/110850 samples (74.8%)


Predicting batches:  83%|████████▎ | 91/109 [03:02<00:35,  1.99s/it]

Processed: 93184/110850 samples (84.1%)


Predicting batches:  93%|█████████▎| 101/109 [03:22<00:15,  2.00s/it]

Processed: 103424/110850 samples (93.3%)


Predicting batches: 100%|██████████| 109/109 [03:37<00:00,  2.00s/it]


Predictions completed: 110850 samples

=== Directional Sanity Check ===
Pos ratio (y_test): 0.5225259359494813
Neg ratio (y_test): 0.47667117726657643
Pred +1 ratio: 0.5566621560667568
Pred -1 ratio: 0.4433378439332431
      Pred+  Pred-
+1 | [32116 25806]
-1 | [29529 23310]

Metrics for window 512:
R²: -0.0190
MAE: 0.0119
MSE: 0.0003
Directional Accuracy: 0.5004
Up_Directional_Acc: 0.5520
Down_Directional_Acc: 0.4384
Top25_R2: -0.0195
Top25_MAE: 0.0103
Top25_MSE: 0.0002
Top25_Dir_Acc: 0.4433
Top25_Up_Acc: 0.5585
Top25_Down_Acc: 0.3667
Bottom25_R2: -0.0197
Bottom25_MAE: 0.0155
Bottom25_MSE: 0.0005
Bottom25_Dir_Acc: 0.5056
Bottom25_Up_Acc: 0.4485
Bottom25_Down_Acc: 0.5496
Predictions saved to timesfm1.0_w512.csv

=== All predictions completed! ===


In [None]:
# Save results to pkl file
results_dict = {
    'window_sizes': window_sizes,
    'results': results,
    'model_name': 'TimesFM',
    'model_version': '1.0',
    'model_params': {
        'backend': 'gpu' if torch.cuda.is_available() else 'cpu',
        'per_core_batch_size': 512 if torch.cuda.is_available() else 256,
        'horizon_len': 1,
        'model_id': 'google/timesfm-1.0-200m-pytorch',
        'batch_size_used': 1024
    }
}
joblib.dump(results_dict, "/content/drive/MyDrive/timesfm1.0_project/timesfm1.0_results/results.pkl")
print("Results saved to results.pkl")

# Save evaluation metrics
metrics_df = pd.DataFrame([
    {**{"Window": window_size}, **results[window_size]["metrics"]}
    for window_size in window_sizes
])
metrics_df.to_csv("/content/drive/MyDrive/timesfm1.0_project/timesfm1.0_results/timesfm1.0_metrics.csv", index=False)
print("Metrics saved to timesfm1.0_metrics.csv")

print("\n=== Metrics Summary ===")
print(metrics_df.round(4))


Results saved to results.pkl
Metrics saved to timesfm1.0_metrics.csv

=== Metrics Summary ===
   Window      R²     MAE     MSE  Directional Accuracy  Up_Directional_Acc  \
0       5 -0.3359  0.0137  0.0004                0.4912              0.4561   
1      21 -0.1422  0.0127  0.0003                0.4929              0.4702   
2     252 -0.0239  0.0120  0.0003                0.4998              0.5616   
3     512 -0.0190  0.0119  0.0003                0.5004              0.5520   

   Down_Directional_Acc  Top25_R2  Top25_MAE  Top25_MSE  Top25_Dir_Acc  \
0                0.5282   -0.3412     0.0119     0.0003         0.4942   
1                0.5158   -0.1386     0.0109     0.0002         0.4939   
2                0.4276   -0.0237     0.0103     0.0002         0.4903   
3                0.4384   -0.0195     0.0103     0.0002         0.4433   

   Top25_Up_Acc  Top25_Down_Acc  Bottom25_R2  Bottom25_MAE  Bottom25_MSE  \
0        0.4581          0.5133      -0.3259        0.0178     