In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
model = InformerForPrediction.from_pretrained(
    "huggingface/informer-tourism-monthly"
)



config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/404k [00:00<?, ?B/s]

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from transformers import InformerConfig, InformerForPrediction

from data.snp500 import snp500_daily
from utils.metrics import metric

# Initialize pretrained Informer model
model = InformerForPrediction.from_pretrained("huggingface/informer-tourism-monthly")

# Print model configuration details
print("Model Configuration:")
print(f"Context length: {model.config.context_length}")
print(f"Prediction length: {model.config.prediction_length}")
print(f"Lags sequence: {model.config.lags_sequence}")
print(f"Number of time features: {model.config.num_time_features}")

# Calculate required context window based on model's config
max_lag = max(model.config.lags_sequence) if model.config.lags_sequence else 0
CONTEXT_WINDOW = max_lag + model.config.context_length
PREDICTION_WINDOW = model.config.prediction_length

print(f"\nRequired dimensions:")
print(f"Context window: {CONTEXT_WINDOW} (includes max lag: {max_lag} + context length: {model.config.context_length})")
print(f"Prediction window: {PREDICTION_WINDOW}")

# Prepare the data - using raw Close prices
sp500_d = snp500_daily.reset_index()[["Date", "Close"]]

def generate_forecast(context_data):
    """Generate and process forecast using Informer"""
    context_length = model.config.context_length
    prediction_length = model.config.prediction_length
    num_features = model.config.num_time_features
    input_size = model.config.input_size
    max_lag = max(model.config.lags_sequence)
    required_length = context_length + max_lag

    context = np.asarray(context_data).flatten()
    if len(context) < required_length:
        num_repeats = (required_length - len(context)) // len(context) + 1
        padded_data = np.tile(context, num_repeats)
        context = padded_data[-required_length:]
    else:
        context = context[-required_length:]

    context_mean = np.mean(context)
    context_std = np.std(context)
    context_scaled = (context - context_mean) / context_std

    past_values = torch.tensor(context_scaled).float().unsqueeze(0)

    timesteps = np.arange(required_length)
    time_feat1 = (timesteps % 365) / 365.0
    time_feat2 = np.cos(2 * np.pi * timesteps / 365.0)
    time_features = np.stack([time_feat1, time_feat2], axis=-1)
    past_time_features = torch.tensor(time_features).float().unsqueeze(0)

    future_timesteps = np.arange(prediction_length) + len(timesteps)
    future_feat1 = (future_timesteps % 365) / 365.0
    future_feat2 = np.cos(2 * np.pi * future_timesteps / 365.0)
    future_time_features = np.stack([future_feat1, future_feat2], axis=-1)
    future_time_features = torch.tensor(future_time_features).float().unsqueeze(0)

    # Verify dimensions
    if past_time_features.shape[-1] != num_features:
        raise ValueError(f"Mismatch in past time features: expected {num_features}, got {past_time_features.shape[-1]}")
    if future_time_features.shape[-1] != num_features:
        raise ValueError(f"Mismatch in future time features: expected {num_features}, got {future_time_features.shape[-1]}")

    outputs = model.generate(
        past_values=past_values,
        past_time_features=past_time_features,
        future_time_features=future_time_features,
        past_observed_mask=None,
        static_categorical_features=None,
        static_real_features=None,
    )

    forecast_samples = outputs.sequences.squeeze().detach().numpy()
    forecast_samples = forecast_samples * context_std + context_mean

    low, median, high = np.quantile(forecast_samples, [0.1, 0.5, 0.9], axis=0)
    return low, median, high

def calculate_metrics(actual, predicted, insample=None):
    actual = np.array(actual).flatten()
    predicted = np.array(predicted).flatten()

    mae, mse, rmse, mape, mspe = metric(predicted, actual)

    smape = 200 * np.mean(
        np.abs(predicted - actual) / (np.abs(predicted) + np.abs(actual))
    )

    if insample is not None:
        naive_forecast = insample[:-1]
        naive_target = insample[1:]
        naive_mae = np.mean(np.abs(naive_target - naive_forecast))
        mase = mae / naive_mae if naive_mae != 0 else np.nan
    else:
        mase = np.nan

    metrics = {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "MAPE": mape * 100,
        "SMAPE": smape,
        "MASE": mase if not np.isnan(mase) else None,
    }

    return metrics

def plot_forecast(data, context_window, prediction_window, median_forecast, low_forecast, high_forecast, title):
    plt.figure(figsize=(12, 6))

    last_context_point = data["Close"].iloc[-prediction_window - 1]

    median_forecast = np.insert(median_forecast, 0, last_context_point)
    low_forecast = np.insert(low_forecast, 0, last_context_point)
    high_forecast = np.insert(high_forecast, 0, last_context_point)

    forecast_dates = pd.concat(
        [
            data["Date"].iloc[-(prediction_window + 1) : -prediction_window],
            data["Date"].iloc[-prediction_window:],
        ]
    )

    actual_prices = data["Close"].iloc[-prediction_window:].values
    actual_prices = np.insert(actual_prices, 0, last_context_point)

    plt.plot(
        data["Date"].iloc[-(context_window + prediction_window) : -prediction_window],
        data["Close"].iloc[-(context_window + prediction_window) : -prediction_window],
        color="royalblue",
        label="Historical Data (Context)",
    )

    plt.plot(forecast_dates, actual_prices, color="green", label="Actual Prices")
    plt.plot(forecast_dates, median_forecast, color="tomato", label="Median Forecast")
    plt.fill_between(
        forecast_dates,
        low_forecast,
        high_forecast,
        color="tomato",
        alpha=0.3,
        label="80% Prediction Interval",
    )

    plt.xlabel("Date")
    plt.ylabel("S&P 500 Price")
    plt.title(title)
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid()
    plt.show()

# Generate full period forecast
context = sp500_d["Close"].values[-CONTEXT_WINDOW:]
low, median, high = generate_forecast(context)

# Calculate and print metrics for full period
actual_prices = sp500_d["Close"].values[-PREDICTION_WINDOW:]
insample_data = sp500_d["Close"].values[
    -CONTEXT_WINDOW - PREDICTION_WINDOW : -PREDICTION_WINDOW
]
metrics = calculate_metrics(actual_prices, median, insample=insample_data)

print("\nFull Period Metrics:")
for metric_name, value in metrics.items():
    if value is not None:
        print(f"{metric_name}: {value:.4f}")
    else:
        print(f"{metric_name}: N/A")

# Plot full period forecast
plot_forecast(
    sp500_d,
    CONTEXT_WINDOW,
    PREDICTION_WINDOW,
    median,
    low,
    high,
    "S&P 500 Price Prediction with Informer",
)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Model Configuration:
Context length: 24
Prediction length: 24
Lags sequence: [1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 23, 24, 25, 35, 36, 37]
Number of time features: 2

Required dimensions:
Context window: 61 (includes max lag: 37 + context length: 24)
Prediction window: 24


RuntimeError: mat1 and mat2 shapes cannot be multiplied (24x20 and 22x32)