In [2]:
import nnts
import nnts.data
import nnts.plotting
import nnts.torch.preprocessing
import nnts.torch.models
import nnts.metrics
import nnts.torch.datasets
import nnts.loggers
import nnts.datasets
import nnts.torch.utils
import nnts.torch.trainers
import nnts.metrics
import nnts.torch

import torch
import pandas as pd
from pydantic import BaseModel
import wandb
from tsfm_public.models.tinytimemixer import TinyTimeMixerForPrediction
from tsfm_public.toolkit.get_model import get_model
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments
import tempfile

INFO:p-40210:t-7916067904:config.py:<module>:PyTorch version 2.4.0 available.
INFO:p-40210:t-7916067904:config.py:<module>:Polars version 1.11.0 available.


In [3]:
DATASET_NAMES = [
    "bitcoin",
    "car_parts",
    "cif_2016",
    "covid_deaths",
    #"dominick",
    #"electricity_hourly",
    #"electricity_weekly",
    "fred_md",
    "hospital",
    #"kaggle_web_traffic",
    #"kdd_cup",
    "m1_monthly",
    "m1_quarterly",
    "m1_yearly",
    "m3_monthly",
    "m3_quarterly",
    "m3_yearly",
    "m4_daily",
    "m4_hourly",
    "m4_monthly",
    "m4_quarterly",
    "m4_weekly",
    "m4_yearly",#fails on date exceeding permitted range
    "nn5_daily",
    "nn5_weekly",
    "pedestrian_counts",
    "rideshare",
    "saugeen_river_flow",
    "solar_10_minutes",
    "solar_weekly",
    "sunspot",
    "temperature_rain",
    "tourism_monthly",
    "tourism_quarterly",
    "tourism_yearly",
    "traffic_hourly",
    "traffic_weekly",
    "us_births",
    "vehicle_trips",
    "weather",
    "australian_electricity_demand",
]

In [4]:
# TTM Model path. The default model path is Granite-R2. Below, you can choose other TTM releases.
model_name = "ttm"
TTM_MODEL_PATH = "ibm-granite/granite-timeseries-ttm-r2"
CONTEXT_LENGTH = 512
PREDICTION_LENGTH = 96
CONTEXT_LENGTH_ITEM = 0

In [38]:
dataset_name = DATASET_NAMES[13]
print(dataset_name)
df, metadata = nnts.datasets.load_dataset(dataset_name)

m4_hourly


In [39]:
dataset_options = {
    "context_length": metadata.context_length,
    "prediction_length": metadata.prediction_length,
    "conts": [],
}
trn_dl, test_dl = nnts.torch.utils.create_dataloaders(
    df,
    nnts.datasets.split_test_train_last_horizon,
    metadata.context_length,
    metadata.prediction_length,
    Dataset=nnts.torch.datasets.TimeseriesDataset,
    dataset_options=dataset_options,
    Sampler=nnts.torch.datasets.TimeSeriesSampler,
)

In [40]:
zeroshot_model = get_model(
    TTM_MODEL_PATH,
    context_length=metadata.context_length,
    prediction_length=metadata.prediction_length,
    freq_prefix_tuning=False,
    freq=None,
    prefer_l1_loss=True,
    prefer_longer_context=True,
    force_return='zeropad',
)

INFO:p-40210:t-7916067904:get_model.py:get_model:Loading model from: ibm-granite/granite-timeseries-ttm-r2
INFO:p-40210:t-7916067904:get_model.py:get_model:Model loaded successfully from ibm-granite/granite-timeseries-ttm-r2, revision = 180-60-ft-l1-r2.1.
INFO:p-40210:t-7916067904:get_model.py:get_model:[TTM] context_length = 180, prediction_length = 60


In [41]:
zeroshot_model.config.context_length

180

In [42]:
temp_dir = tempfile.mkdtemp()
SEED = 42
batch_size = 64

In [43]:
metadata

Metadata(filename='m4_hourly_dataset.tsf', dataset='m4_hourly', context_length=210, prediction_length=48, freq='1H', seasonality=24, url='https://zenodo.org/records/4656589/files/m4_hourly_dataset.zip', context_lengths=[210, 96, 60], multivariate=False)

In [44]:
zeroshot_trainer = Trainer(
    model=zeroshot_model,
    args=TrainingArguments(
        output_dir=temp_dir,
        per_device_eval_batch_size=batch_size,
        seed=SEED,
        report_to="none",
    ),
)

In [52]:
metadata

Metadata(filename='m4_hourly_dataset.tsf', dataset='m4_hourly', context_length=210, prediction_length=48, freq='1H', seasonality=24, url='https://zenodo.org/records/4656589/files/m4_hourly_dataset.zip', context_lengths=[210, 96, 60], multivariate=False)

In [56]:
DEFAULT_FREQUENCY_MAPPING = {
    "oov": 0,
    "min": 1,  # minutely
    "2min": 2,
    "5min": 3,
    "10min": 4,
    "15min": 5,
    "30min": 6,
    "h": 7,  # hourly
    "1H": 7,  # hourly, for compatibility
    "d": 8,  # daily, for compatibility
    "D": 8,  # daily
    "W": 9,  # weekly
}

class Dataset:
    def __init__(self, ds, metadata):
        self.ds = ds
        self.metadata = metadata
        self.fixed_context_length = 512

    def __getitem__(self, idx):
        padded_data = self.ds[idx]
        past_values = padded_data.data[:self.metadata.context_length]
        future_values = padded_data.data[self.metadata.context_length :]
        zero_padded_data = torch.zeros(self.fixed_context_length, 1)
        zero_padded_data[-past_values.shape[0] :] = past_values
        past_observed_mask = torch.zeros(self.fixed_context_length, 1)
        past_observed_mask[-past_values.shape[0] :] = 1

        return {
            "past_values": zero_padded_data,
            "future_values": future_values,
            "past_observed_mask": past_observed_mask.bool(),
            "freq_token": torch.tensor(
                DEFAULT_FREQUENCY_MAPPING.get(self.metadata.freq, 0), dtype=torch.int32
            ),
        }

    def __len__(self):
        return len(self.ds)


class TruncatedDataset:
    def __init__(self, ds, metadata, model_context_length):
        self.ds = ds
        self.metadata = metadata
        self.model_context_length = model_context_length

    def __getitem__(self, idx):
        padded_data = self.ds[idx]
        past_values = padded_data.data[
            -self.model_context_length
            - self.metadata.prediction_length : -self.metadata.prediction_length
        ]
        future_values = padded_data.data[-self.metadata.prediction_length :]
        past_observed_mask = torch.ones(self.model_context_length, 1)

        return {
            "past_values": past_values,
            "future_values": future_values,
            "past_observed_mask": past_observed_mask.bool(),
            "freq_token": torch.tensor(
                DEFAULT_FREQUENCY_MAPPING.get(self.metadata.freq, 0), dtype=torch.int32
            ),
        }

    def __len__(self):
        return len(self.ds)

In [57]:
if zeroshot_model.config.context_length < metadata.context_length:
    test_data = TruncatedDataset(test_dl.dataset, metadata, zeroshot_model.config.context_length)
else:
    test_data = Dataset(test_dl.dataset, metadata)

In [58]:
test_data[0]["future_values"].shape

torch.Size([48, 1])

In [59]:
predictions_dict = zeroshot_trainer.predict(test_data)
y = test_dl.dataset.X[:, -metadata.prediction_length :]
y_hat = torch.tensor(predictions_dict[0][0])
seasonal_error = nnts.metrics.calculate_seasonal_error(trn_dl, metadata.seasonality)
test_metrics = nnts.metrics.calc_metrics(
    y_hat,
    y,
    seasonal_error,
)
print(test_metrics)

{'mse': 2104972.25, 'abs_error': 5907565.0, 'abs_target_sum': 145558864.0, 'abs_target_mean': 7324.822265625, 'seasonal_error': 336.9046936035156, 'mean_mase': 2.3399574756622314, 'mean_mape': 0.13688777387142181, 'mean_smape': 0.1268552839756012, 'mean_msmape': 0.1267193853855133, 'mean_mae': 297.28082275390625, 'mean_rmse': 358.534423828125, 'median_mase': 1.2051705121994019, 'median_smape': 0.0616854764521122, 'median_msmape': 0.0616719089448452, 'median_mae': 14.963363647460938, 'median_rmse': 21.463424682617188}


In [None]:
from tsfm_public import (
    TimeSeriesPreprocessor,
    TrackingCallback,
    count_parameters,
    get_datasets,
)

TARGET_DATASET = "etth1"
dataset_path = "data/ETTh1.csv"
timestamp_column = "date"
id_columns = []  # mention the ids that uniquely identify a time-series.

target_columns = ["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"]
split_config = {
    "train": [0, 8640],
    "valid": [8640, 11520],
    "test": [
        11520,
        14400,
    ],
}
# Understanding the split config -- slides

data = pd.read_csv(
    dataset_path,
    parse_dates=[timestamp_column],
)

column_specifiers = {
    "timestamp_column": timestamp_column,
    "id_columns": id_columns,
    "target_columns": target_columns,
    "control_columns": [],
}
context_length=512
forecast_length=96
tsp = TimeSeriesPreprocessor(
    **column_specifiers,
    context_length=context_length,
    prediction_length=forecast_length,
    scaling=True,
    encode_categorical=False,
    scaler_type="standard",
)

dset_train, dset_valid, dset_test = get_datasets(
    tsp,
    data,
    split_config,
    use_frequency_token=zeroshot_model.config.resolution_prefix_tuning,
)

In [177]:
dset_test[0]["past_observed_mask"].shape

torch.Size([512, 7])