# Forecasting

## Univariate

In [1]:
import sys
sys.path.insert(0,'../')

In [57]:
import plotly.express as px

In [2]:
from ts2vec import TS2Vec
import tasks
import datautils

In [3]:
# python -u train.py electricity forecast_univar --loader 
# forecast_csv_univar --repr-dims 320 --max-threads 8 --seed 42 --eval

In [4]:
dataset = 'electricity'

In [5]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np

In [75]:

# def load_forecast_csv(name, univar=False, time_features=True):

name = dataset
univar = True
time_features = True

raw_data = pd.read_csv(f'../datasets/{name}.csv', index_col='date', parse_dates=True)
if time_features:
    dt_embed = datautils._get_time_features(raw_data.index)
    n_covariate_cols = dt_embed.shape[-1]
else:
    dt_embed = None
    n_covariate_cols = 0

if univar:
    if name in ('ETTh1', 'ETTh2', 'ETTm1', 'ETTm2'):
        data = raw_data[['OT']]
    elif name == 'electricity':
        data = raw_data[['MT_001']]
    else:
        data = raw_data.iloc[:, -1:]


In [None]:

data = data.to_numpy()
print(f"{data.shape=}")
if name == 'ETTh1' or name == 'ETTh2':
    train_slice = slice(None, 12*30*24)
    valid_slice = slice(12*30*24, 16*30*24)
    test_slice = slice(16*30*24, 20*30*24)
elif name == 'ETTm1' or name == 'ETTm2':
    train_slice = slice(None, 12*30*24*4)
    valid_slice = slice(12*30*24*4, 16*30*24*4)
    test_slice = slice(16*30*24*4, 20*30*24*4)
else:
    train_slice = slice(None, int(0.6 * len(data)))
    valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
    test_slice = slice(int(0.8 * len(data)), None)

scaler = StandardScaler().fit(data[train_slice])
print(f"{data[train_slice].shape=}")
print(f"{data.shape=}")
data = scaler.transform(data)
print(f"{data.shape=}")
if name in ('electricity'):
    data = np.expand_dims(data.T, -1)  # Each variable is an instance rather than a feature
else:
    data = np.expand_dims(data, 0)

if n_covariate_cols > 0:
    dt_scaler = StandardScaler().fit(dt_embed[train_slice])
    dt_embed = np.expand_dims(dt_scaler.transform(dt_embed), 0)
    data = np.concatenate([np.repeat(dt_embed, data.shape[0], axis=0), data], axis=-1)

if name in ('ETTh1', 'ETTh2', 'electricity'):
    pred_lens = [24, 48, 168, 336, 720]
else:
    pred_lens = [24, 48, 96, 288, 672]

    # return data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols


data.shape=(26304, 1)
data[train_slice].shape=(15782, 1)
data.shape=(26304, 1)
data.shape=(26304, 1)


In [7]:
# (
#     data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols
# ) = load_forecast_csv(
#     dataset, univar=True
# )


In [8]:
data.shape

(1, 26304, 8)

In [9]:
0.6 * len(data)

0.6

In [10]:
data[:, slice(None, int(0.6 * len(data[0])))].shape

(1, 15782, 8)

In [11]:
train_data = data[:, train_slice]

In [12]:
train_data.shape

(1, 15782, 8)

In [13]:
from utils import init_dl_program, name_with_datetime, pkl_save, data_dropout

In [14]:
seed = 42
max_threads = 8
gpu = 0

In [15]:
device = init_dl_program(gpu, seed=seed, max_threads=max_threads)

  return self.getter()


In [16]:
batch_size = 8
lr = 0.001
output_dims = 320
max_train_length = 3000

In [17]:
config = dict(
    batch_size=batch_size,
    lr=lr,
    output_dims=output_dims,
    max_train_length=max_train_length
)

In [18]:
model = TS2Vec(
    input_dims=train_data.shape[-1],
    device=device,
    **config
)

In [19]:
run_dir = "../training/electricity__forecast_univar_20251101_232445"

In [20]:
model.load(f'{run_dir}/model.pkl')

In [21]:
test_data = data[:, test_slice, n_covariate_cols:]

In [22]:
test_data.shape

(1, 5261, 1)

## Eval

In [None]:
import time


In [62]:
pred_lens

[24, 48, 168, 336, 720]

In [85]:
padding = 200
t = time.time()
all_repr = model.encode(
    data,
    causal=True,
    sliding_length=1,
    sliding_padding=padding,
    batch_size=256
)
ts2vec_infer_time = time.time() - t

train_repr = all_repr[:, train_slice]
valid_repr = all_repr[:, valid_slice]
test_repr = all_repr[:, test_slice]

train_data = data[:, train_slice, n_covariate_cols:]
valid_data = data[:, valid_slice, n_covariate_cols:]
test_data = data[:, test_slice, n_covariate_cols:]

ours_result = {}
lr_train_time = {}
lr_infer_time = {}
out_log = {}
for pred_len in pred_lens[:1]:
    train_features, train_labels = tasks.forecasting.generate_pred_samples(train_repr, train_data, pred_len, drop=padding)
    valid_features, valid_labels = tasks.forecasting.generate_pred_samples(valid_repr, valid_data, pred_len)
    test_features, test_labels = tasks.forecasting.generate_pred_samples(test_repr, test_data, pred_len)

    t = time.time()
    lr = tasks._eval_protocols.fit_ridge(train_features, train_labels, valid_features, valid_labels)
    lr_train_time[pred_len] = time.time() - t
    
    t = time.time()
    test_pred = lr.predict(test_features)
    lr_infer_time[pred_len] = time.time() - t

    ori_shape = test_data.shape[0], -1, pred_len, test_data.shape[2]
    
    
    if test_data.shape[0] > 1:
        print("swap axis")
        test_pred_inv = scaler.inverse_transform(test_pred.swapaxes(0, 3)).swapaxes(0, 3)
        test_labels_inv = scaler.inverse_transform(test_labels.swapaxes(0, 3)).swapaxes(0, 3)
    else:
        test_pred_inv = scaler.inverse_transform(test_pred)
        test_labels_inv = scaler.inverse_transform(test_labels)

    test_pred = test_pred.reshape(ori_shape)
    test_labels = test_labels.reshape(ori_shape)
        
    out_log[pred_len] = {
        'norm': test_pred,
        'raw': test_pred_inv,
        'norm_gt': test_labels,
        'raw_gt': test_labels_inv
    }
    ours_result[pred_len] = {
        'norm': tasks.forecasting.cal_metrics(test_pred, test_labels),
        'raw': tasks.forecasting.cal_metrics(test_pred_inv, test_labels_inv)
    }
    
eval_res = {
    'ours': ours_result,
    'ts2vec_infer_time': ts2vec_infer_time,
    'lr_train_time': lr_train_time,
    'lr_infer_time': lr_infer_time
}

In [86]:
eval_res.keys()

dict_keys(['ours', 'ts2vec_infer_time', 'lr_train_time', 'lr_infer_time'])

In [87]:
eval_res["ours"]

{24: {'norm': {'MSE': np.float64(0.2609168600671482),
   'MAE': np.float64(0.29016099102652765)},
  'raw': {'MSE': np.float64(152.9921800501309),
   'MAE': np.float64(7.026233422791587)}}}

In [93]:
test_labels.shape

(1, 5237, 24, 1)

In [None]:
df_0 = pd.DataFrame({
    "index": np.arange(test_pred.shape[2]),
    "pred": test_pred[0,0,:,0],
    "label": test_labels[0,0,:,0],
})

In [97]:
test_pred_inv.shape, test_labels_inv.shape

((5237, 24), (5237, 24))

In [98]:
df_0_inv = pd.DataFrame({
    "index": np.arange(test_pred_inv.shape[1]),
    "pred": test_pred_inv[0,:],
    "label": test_labels_inv[0,:],
})

In [99]:
px.line(
    df_0_inv.melt(
        id_vars="index",
        value_vars=["pred", "label"],
        var_name="type",
        value_name="value"
    ),
    x="index",
    y="value",
    color="type"
)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [71]:
test_pred.shape

(1, 4541, 720, 1)

In [72]:
test_labels.shape

(1, 4541, 720, 1)

## Check Embedding

In [23]:
test_data.shape, train_data.shape

((1, 5261, 1), (1, 15782, 8))

In [27]:
from torch.utils.data import TensorDataset, DataLoader
import torch

In [39]:
t_dataset = TensorDataset(torch.from_numpy(data).to(torch.float))
loader = DataLoader(t_dataset, batch_size=batch_size)

In [40]:
list(loader)[0][0].shape

torch.Size([1, 26304, 8])

In [48]:
test_repr = model.encode(data)

In [81]:
test_repr.shape, raw_data.shape, data.shape

((1, 26304, 320), (26304, 321), (26304, 1))

In [63]:
test_repr.squeeze()[:100].shape

(100, 320)

In [83]:
px.imshow(
    test_repr.squeeze()[:1000].T,
    labels={"x": "Timestamp", "y": "Dimensions"},
    title="TS2Vec Representation Heatmap",
    aspect="auto",
    color_continuous_scale='gray'
)

In [79]:
px.line(
    raw_data[["MT_001"]][:1000].reset_index(),
    x="date",
    y="MT_001"
)

In [45]:

# Compute instance-level representations for test set
test_repr = model.encode(data, encoding_window='full_series')  # n_instances x output_dims


In [47]:
test_repr.shape

(1, 320)

In [None]:

# Sliding inference for test set
test_repr = model.encode(
    data,
    causal=True,
    sliding_length=1,
    sliding_padding=50
)  # n_instances x n_timestamps x output_dims