In [1]:
import copy
from timeit import default_timer as timer

import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    root_mean_squared_error,
)

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader


# local scripts
from utils import *
from encoder.tcn_encoder import CausalCNNEncoder
from encoder.losses.info_nce import InfoNCE

### Dataset

In [2]:
class TSTrainDataset(Dataset):
    def __init__(self, data):
        self.data = torch.from_numpy(data).float()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


class TSTestDataset:
    def __init__(self, encoder, data, hist_timesteps, forecast_steps, skip=12):
        self.X, self.y = create_lag_features(
            data, hist_timesteps, forecast_steps, skip,
        )
        n_samples = self.X.shape[0]
        self.X = torch.tensor(np.array([model.encode(self.X[i], pooling='avg') for i in range(n_samples)])).float()
        self.y = torch.tensor(self.y).float()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx].squeeze()

### Learner

In [3]:
class TSEncoder(nn.Module):
    def __init__(
        self,
        encoder,
        loss_fn,
        optimizer,
        train_dataset,
        lr=0.001,
        batch_size=8,
        device='cpu',
    ):
        super().__init__()
        self.device = device
        self.batch_size = batch_size

        self.net = copy.deepcopy(encoder).to(device)
        self.loss_fn = loss_fn
        self.optimizer = optimizer(self.net.parameters(), lr=lr)

        self.train_dataset = train_dataset
        self.train_dataloader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True,
        )

        self.n_epochs = 0
        self.n_iters = 0

    def fit(
        self,
        n_epochs=None,
        n_iters=None,
        mask_prob=0.5,
        verbose=False,
    ):
        ''' `train_ds` shape: [B, C, T]
        '''
        ### Set the default number of training iterations to 600
        if n_iters is None and n_epochs is None: n_iters = 600

        ### Training loop
        loss_log = []
        while True:
            if n_epochs is not None and self.n_epochs >= n_epochs: break

            total_loss = 0
            n_epoch_iters = 0

            interrupted = False
            ### Iterate through each mini-batch
            for x in self.train_dataloader:
                if n_iters is not None and self.n_iters >= n_iters:
                    interrupted = True
                    break

                ### Reset gradient
                self.optimizer.zero_grad()

                ### Get two cropped views
                x1, x2, overlap_size = get_crops(x, overlap_size=0.7)

                ### Encode the input
                x1 = self.net(x1, mask_prob=0.5)[:, :, -overlap_size:]
                x2 = self.net(x2, mask_prob=0.5)[:, :, :overlap_size]
                # x2 = self.net(add_noise(x2, std=0.02), mask_prob=0.5)[:, :, :overlap_size]

                ### Calculate loss and backward pass
                loss = self.loss_fn(
                    x1.reshape(self.batch_size, -1),
                    x2.reshape(self.batch_size, -1),
                )
                loss.backward()

                ### Upgrade gradient
                self.optimizer.step()

                total_loss += loss.item()
                n_epoch_iters += 1
                self.n_iters += 1

            if interrupted: break

            total_loss /= n_epoch_iters
            loss_log.append(total_loss)

            ### Print training loss
            if verbose: print(f'Epoch {self.n_epochs}: loss = {total_loss}')

            self.n_epochs += 1

        return loss_log

    def encode(
        self,
        data,
        batch_size=None,
        mask=None,
        pooling='max',
    ):
        ''' Input: [B, C, T]
        '''
        assert self.net is not None, 'Please train or load a model'
        assert data.ndim == 3

        if batch_size is None: batch_size = self.batch_size
        n_samples, _, seq_len = data.shape

        org_training = self.net.training
        self.net.eval()

        ### Dataset and DataLoader
        dataset = TensorDataset(torch.from_numpy(data).float())
        dataloader = DataLoader(dataset, batch_size=batch_size)

        ### encode data
        with torch.no_grad():
            output = []
            for x in dataloader:
                output.append(self._eval_with_pooling(x[0], mask, pooling))
            output = torch.cat(output, dim=0).squeeze(2)
        self.net.train(org_training)
        return output.numpy()

    def _eval_with_pooling(self, x, mask=None, pooling='avg'):
        ''' Input: [B, C, T]
        '''
        out = self.net(x.to(self.device, non_blocking=True), mask)
        seq_len = out.size(2)
        if pooling == 'avg':
            out = F.avg_pool1d(out, kernel_size=seq_len)
        elif pooling == 'max':
            out = F.max_pool1d(out, kernel_size=seq_len)
        return out.cpu()

### training

In [4]:
# original shape: [timestep, location, feature]
# reshape to:     [location, feature, timestep]
# [B, C, T]
dataset = 'pems04'
pems = load_pems(f'data/{dataset}.npz')
X_train, X_val, X_test = get_train_test_splits(pems, train_size=0.6, test_size=0.2)

l = 12 + 6
X_train = split_time_series(X_train, l)
selected_samples = np.random.permutation(np.arange(X_train.shape[0]))[:2000]
X_train = X_train[selected_samples]

In [5]:
repr_dim = 128

encoder_params = {
    'hidden_dim': 32,
    'output_dim': repr_dim,
    'depth': 2,
    'kernel_size': 3,
    'dropout': 0.0,
    'mask_mode': 'b',
}
model_params = {
    'loss_fn': InfoNCE(temperature=0.1),
    'optimizer': torch.optim.Adam,
    'train_dataset': TSTrainDataset(X_train),
    'lr': 0.0001,
    'batch_size': 8,
    'device': 'cpu',
}

encoder = CausalCNNEncoder(input_dim=X_train.shape[1], **encoder_params)
model = TSEncoder(encoder=encoder, **model_params)

log = model.fit(
    n_epochs=10,
    verbose=True,
)

Epoch 0: loss = 2.323422065258026
Epoch 1: loss = 2.0914916157722474
Epoch 2: loss = 2.0729483942985536
Epoch 3: loss = 2.0320279717445375
Epoch 4: loss = 1.623516972064972
Epoch 5: loss = 1.254003221988678
Epoch 6: loss = 1.1857203299999237
Epoch 7: loss = 1.0963209590911864
Epoch 8: loss = 1.0469408822059632
Epoch 9: loss = 0.9758206036090851


### Testing

In [6]:
X_train, X_val, X_test = get_train_test_splits(pems, train_size=0.6, test_size=0.2)

hist_timesteps = 12
forecast_steps = 12
skip = forecast_steps

start = timer()

train_ds = TSTestDataset(model, X_train, hist_timesteps, forecast_steps, skip)
test_ds = TSTestDataset(model, X_test, hist_timesteps, forecast_steps, skip)

end = timer()
print(end - start)

145.69440021900118


In [10]:
rmse = []
mae = []

for node in range(X_test.shape[0]):

    X_train_single = train_ds[:][0][:, node]
    y_train_single = train_ds[:][1][:, node]
    X_test_single = test_ds[:][0][:, node]
    y_test_single = test_ds[:][1][:, node]

    lr = Ridge(alpha=1.0)

    lr.fit(X_train_single, y_train_single)
    y_pred = lr.predict(X_test_single)

    rmse.append(root_mean_squared_error(y_pred, y_test_single))
    mae.append(mean_absolute_error(y_pred, y_test_single))

print(np.array(rmse).mean())
print(np.array(mae).mean())

35.264885019467116
25.463049050281416


In [8]:
X, y = create_lag_features(X_train, hist_timesteps, forecast_steps, skip)
X_t, y_t = create_lag_features(X_test, hist_timesteps, forecast_steps, skip)
X = X.squeeze()
X_t = X_t.squeeze()

rmse = []
mae = []

for node in range(X_test.shape[0]):

    X_train_single = X[:, node]
    y_train_single = y[:, node]
    X_test_single = X_t[:, node]
    y_test_single = y_t[:, node]

    lr = Ridge(alpha=1.0)
    lr.fit(X_train_single, y_train_single)
    y_pred = lr.predict(X_test_single)

    rmse.append(root_mean_squared_error(y_pred, y_test_single))
    mae.append(mean_absolute_error(y_pred, y_test_single))

print(np.array(rmse).mean())
print(np.array(mae).mean())

38.21645359800008
27.820573119643193
