In [None]:
# ======================================================================
# ViT forecasting template (GPT-OSS style; no quantization, loads once)
# ======================================================================

# --- Imports ---
import math
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
import torch.nn as nn
from PIL import Image

from transformers import AutoImageProcessor, ViTForImageClassification
import torchvision.transforms as T

# --- Global results dict + logger ---
RESULTS = {}

def log_simple_result(results_dict, dataset_name, horizon, mae, rmse, mape, r2):
    results_dict[dataset_name] = {
        "horizon": horizon,
        "MAE": round(mae, 2),
        "RMSE": round(rmse, 2),
        "MAPE (%)": round(mape, 2),
        "R²": round(r2, 4),
    }

# --- Load ViT once ---
MODEL_ID = "google/vit-base-patch16-224"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = AutoImageProcessor.from_pretrained(MODEL_ID)
vit = ViTForImageClassification.from_pretrained(
    MODEL_ID,
    num_labels=1,                 # we output a single scalar
    ignore_mismatched_sizes=True  # swap the head cleanly
)
# Replace/ensure a 1-dim head; keep it simple and predictable
in_features = vit.classifier.in_features
vit.classifier = nn.Linear(in_features, 1)
vit.eval().to(DEVICE)

# 16x16 = 256 tokens window -> 224x224 expected by ViT
IMAGE_SIZE = 16
PATCH_WINDOW = IMAGE_SIZE * IMAGE_SIZE
resize = T.Resize((224, 224), antialias=True)

# --- Forecasting function ---
def vit_prediction(
    dataset,
    horizon=12,
    frequency="M",
    dataset_name=None,
    results_dict=RESULTS,
    seed=42,
):
    """
    dataset: callable -> (train_df, test_df) with columns ['unique_id','ds','y']
    Returns RESULTS dict and shows a plot.
    The ViT consumes a 16x16 grid of the last 256 normalized values as an image.
    """

    # 1) Load data
    train_df, test_df = dataset(horizon=horizon)
    y_train = train_df["y"].values.astype(float)
    y_test  = test_df["y"].values.astype(float)

    # 2) Normalize on TRAIN only (avoid leakage)
    scaler = MinMaxScaler(feature_range=(0, 1))
    y_train_norm = scaler.fit_transform(y_train.reshape(-1, 1)).flatten()

    # Build initial 256-length window from tail of train (pad with zeros if short)
    if len(y_train_norm) < PATCH_WINDOW:
        window = np.pad(y_train_norm, (PATCH_WINDOW - len(y_train_norm), 0), mode="constant")
    else:
        window = y_train_norm[-PATCH_WINDOW:].copy()

    # 3) Helper: predict one step from a 256-length window
    torch.manual_seed(seed)
    def predict_next_point_01(input_seq_256: np.ndarray) -> float:
        img = input_seq_256.reshape(IMAGE_SIZE, IMAGE_SIZE)
        # to 1x3xHxW tensor
        tensor_img = torch.tensor(img, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
        tensor_img = tensor_img.repeat(1, 3, 1, 1)
        tensor_img = resize(tensor_img).to(DEVICE)

        with torch.no_grad():
            out = vit(tensor_img)
            # map to [0,1] to be safe (the head is linear)
            val01 = torch.sigmoid(out.logits.squeeze()).item()
        return float(val01)

    # 4) Rollout horizon steps
    preds_norm = []
    cur = window.copy()
    for _ in range(horizon):
        nxt = predict_next_point_01(cur)
        preds_norm.append(nxt)
        # slide window
        cur = np.roll(cur, -1)
        cur[-1] = nxt

    # 5) Inverse scale predictions to original scale (using TRAIN scaler)
    y_pred = scaler.inverse_transform(np.array(preds_norm).reshape(-1, 1)).flatten()

    # 6) Metrics vs test set
    mae  = mean_absolute_error(y_test, y_pred)
    rmse = math.sqrt(mean_squared_error(y_test, y_pred))
    with np.errstate(divide="ignore", invalid="ignore"):
        mape = np.nanmean(np.abs((y_test - y_pred) / y_test) * 100.0)
        if np.isnan(mape):
            mape = float("inf")
    ss_res = np.sum((y_test - y_pred) ** 2)
    ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
    r2 = 1 - ss_res / ss_tot if ss_tot != 0 else float("nan")

    # 7) Log
    name = dataset_name or getattr(dataset, "__name__", "unnamed_dataset")
    log_simple_result(results_dict, name, horizon, mae, rmse, mape, r2)

    # 8) Plot
        # --- Professional style plot ---
    history_color = '#1b9e77'   # teal
    observed_color = '#d95f02'  # muted orange
    forecast_color = '#7570b3'  # muted purple

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(10, 5))

    ax.plot(train_df['ds'], y_train, label="Historical Data",
            color=history_color, linewidth=2)
    ax.plot(test_df['ds'], y_test, label="Observed Future",
            color=observed_color, linewidth=2,)
    ax.plot(test_df['ds'], y_pred, label="Model Forecast",
            color=forecast_color, linewidth=2, linestyle='--')

    ax.set_title(f"Forecasting Monthly Air Passenger Counts Using ViT (H = {horizon})",
                 fontsize=14, fontweight='bold', pad=15)
    ax.set_xlabel("Date", fontsize=12)
    ax.set_ylabel("Number of Passengers", fontsize=12)

    ax.legend(fontsize=11, frameon=True, loc='upper left')
    ax.tick_params(axis='x', rotation=30)
    ax.margins(x=0.02)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.tight_layout()
    plt.savefig('ViT_forecast.png', dpi=300)
    plt.show()


    return results_dict

# --- Dataset loader (AirPassengers; Aileen Nielsen CSV like your snippet) ---
def load_air_passengers(horizon=12, unique_id="AP1"):
    url = "https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv"
    df = pd.read_csv(url, header=0)
    df.columns = ["ds", "y"]
    df["ds"] = pd.to_datetime(df["ds"], format="%Y-%m")

    train_df = df.iloc[:-horizon].copy()
    test_df  = df.iloc[-horizon:].copy()
    train_df["unique_id"] = unique_id
    test_df["unique_id"]  = unique_id
    return train_df[["unique_id","ds","y"]], test_df[["unique_id","ds","y"]]

# --- Example run ---
if __name__ == "__main__":
    vit_prediction(load_air_passengers, horizon=24, frequency="M")
    print(RESULTS)

In [None]:
def load_temperature(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Temperature'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Temperature:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_temperature, horizon=24, frequency="M")

In [None]:
def load_air_sunsopts(horizon=24, unique_id="AP1"):

    url = "https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Sunspots.csv"
    df = pd.read_csv(url)
    df = df.drop(columns=['Unnamed: 0'])
    print(df.shape)

    # Standardize column names
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.rename(columns={"Date": "ds", "Monthly Mean Total Sunspot Number": "y"})
    print('Mean: ', df['y'].mean())

    # Split train/test
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_air_sunsopts, horizon=120, frequency="M")

In [None]:
def load_temp(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/temp.csv")

    # Remove unnamed columns
    if 'Unnamed: 2' in df.columns:
        df = df.drop(columns=['Unnamed: 2'])

    # Parse date safely
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "temp": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_temp, frequency='D', horizon=240)

In [None]:
def load_temperature(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Temperature'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Temperature:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_temperature, frequency='M', horizon=24)

In [None]:
def load_precipitation(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Precipitation'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Precipitation:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_precipitation, frequency='M', horizon=24)

In [None]:
def load_humidity(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Specific Humidity'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Specific Humidity:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_humidity, frequency='M', horizon=24)

In [None]:
def load_relative_humidity(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Relative Humidity'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Relative Humidity:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_relative_humidity, frequency='M', horizon=24)

In [None]:
def load_birth(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-total-female-births.csv")

    # Remove unnamed columns
    if 'Unnamed: 2' in df.columns:
        df = df.drop(columns=['Unnamed: 2'])

    # Parse date safely
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "Births": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_birth, frequency='M', horizon=24)

In [None]:
def load_store(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/store.csv")
    df = df[(df['store'] == 0) & (df['product'] == 0)]
    df = df.drop(columns=['store', 'product'])

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "number_sold": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_store, frequency='M', horizon=24)

In [None]:
def load_hospitality(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/HospitalityEmployees.csv")

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "Employees": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

vit_prediction(load_hospitality, frequency='M', horizon=24)

In [None]:
vit_results = pd.DataFrame(RESULTS).T
vit_results.to_csv('vit_results.csv', index=True)
vit_results