In [1]:

import torch
import argparse
from pathlib import Path
import numpy as np
from src.model_utils import build_model
import src.datasets as datasets
import src.evaluation as evaluation
from src.torch_utils import torch2numpy
from reevaluate import get_test_dataset
from tqdm import tqdm


  "Using `json`-module for json-handling. "


In [11]:
def get_quantile_forecast(pred, quantile):
    # shape is [num_samples, num_series, prediction_length]
    # return the quantile of the samples for each series
    return np.quantile(pred, quantile, axis=0)


In [2]:
ckpt_file = "./Checkpoints/bouncing_ball.pt"
ckpt = torch.load(ckpt_file, map_location="cpu")

config = ckpt["config"]
model = build_model(config=config)
model.load_state_dict(ckpt["model"])

test_dataset = get_test_dataset(config)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=80)
extra_args = dict()
extra_args = {"dur_temperature": 1.0}
device = 'cpu'

In [12]:
def autoregressive_cov(ckpt_file):

    ckpt = torch.load(ckpt_file, map_location="cpu")

    config = ckpt["config"]
    model = build_model(config=config)
    model.load_state_dict(ckpt["model"])

    test_dataset = get_test_dataset(config)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=80)

    #do predictions autoregressively. 
    context_length = config['context_length']
    prediction_length = config['prediction_length']
    ground_truth = []
    all_lb = []
    all_ub = []
    all_mean = []
    # calculate coverage and width of prediction intervals
    for test_batch, test_label in test_loader:
        true = test_batch[:, -prediction_length:]
        autoregressive_lb = []
        autoregressive_ub = []
        autoregressive_mean = []
        for t in tqdm(range(prediction_length)):
            pred = model.predict(test_batch[:,t:t+context_length], num_samples=100, pred_one_step=True)
            pred_y = pred['rec_n_forecast'][:,:,-1]

            mean = get_quantile_forecast(pred_y, 0.5)
            lb = get_quantile_forecast(pred_y, 0.1)
            ub = get_quantile_forecast(pred_y, 0.9) 

            autoregressive_lb.append(lb)
            autoregressive_ub.append(ub)
            autoregressive_mean.append(mean)
        ground_truth.append(true)
        all_lb.append(autoregressive_lb)
        all_ub.append(autoregressive_ub)
        all_mean.append(autoregressive_mean)

    ground_truth = np.concatenate(ground_truth, 0)
    all_lb = np.concatenate(all_lb, 0)
    all_ub = np.concatenate(all_ub, 0)
    all_mean = np.concatenate(all_mean, 0)

    # swap first two axes of ub and lb
    all_lb = np.swapaxes(all_lb, 0, 1)
    all_ub = np.swapaxes(all_ub, 0, 1)
    all_mean = np.swapaxes(all_mean, 0, 1)
    # coverage
    coverage = np.mean(np.logical_and(ground_truth >= all_lb, ground_truth <= all_ub))
    # width
    width = np.mean(all_ub - all_lb)

    return coverage, width, (ground_truth, all_mean, all_lb, all_ub)

In [None]:
ckpt_bb = "./Checkpoints/bouncing_ball.pt"
ckpt_bbnoisy = "./Checkpoints/bb_noisy.pt"

coverage, width, (ground_truth, all_mean, all_lb, all_ub) = autoregressive_cov(ckpt_bb)

#save ground truth, all_mean, all_lb, all_ub
np.savez("./results/autoregressive_cov.npz", ground_truth=ground_truth, mean=all_mean, lb=all_lb, ub=all_ub)


In [8]:
config['prediction_length']

50

In [98]:

true_segs = []
pred_segs = []
true_tss = []
recons_tss = []

for test_batch, test_label in test_loader:
    test_batch = test_batch.to(device)
    test_result = model(
        test_batch,
        switch_temperature=1.0,
        num_samples=1,
        deterministic_inference=True,
        **extra_args,
    )

    pred_seg = torch2numpy(torch.argmax(test_result["log_gamma"][0], dim=-1))
    true_seg = torch2numpy(test_label[:, : config["context_length"]])
    true_ts = torch2numpy(test_result["inputs"])
    recons_ts = torch2numpy(test_result["reconstructions"])
    true_tss.append(true_ts)
    recons_tss.append(recons_ts)
    true_segs.append(true_seg)
    pred_segs.append(pred_seg)
    break

    

In [99]:
pred_seg

array([[0, 0, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [54]:
from collections import Counter

Counter(pred['z_emp_probs'].view(-1).numpy())

Counter({1.0: 4000, 0.0: 4000})

In [33]:
test_result.keys()

dict_keys(['elbov2', 'iwlbov2', 'inputs', 'reconstructions', 'x_samples', 'log_gamma', 'crossent_regularizer'])

In [25]:

true_tss = np.concatenate(true_tss, 0)
recons_tss = np.concatenate(recons_tss, 0)
true_segs = np.concatenate(true_segs, 0)
pred_segs = np.concatenate(pred_segs, 0)
seg_metrics = evaluation.evaluate_segmentation(
    true_segs, pred_segs, K=config["num_categories"]
)
#print(config["dataset"], seg_metrics)


In [26]:
seg_metrics

{'nmi_score': 0.765731146550337,
 'ari_score': 0.8514450465440498,
 'accuracy': 0.961375,
 'f1_score': 0.9614082389518032}

In [30]:
evaluation.evaluate_segmentation(
    true_segs[0], pred_segs[0], K=config["num_categories"]
)

{'nmi_score': 1.0, 'ari_score': 1.0, 'accuracy': 1.0, 'f1_score': 1.0}

## Forcasting

In [104]:
mses = []
seg_errors = []
true_segs = []
pred_segs = []
for test_batch, test_label in test_loader:
    test_batch = test_batch.to(device)

    pred = model.predict(test_batch)
    true = test_batch[:, config['context_length']:]
    pred_y = pred['rec_n_forecast'][:,:,config['context_length']:]

    pred_z = pred['z_emp_probs']
    mse = ((true[...,:1,:] - pred_y[...,:1,:]) ** 2).mean()
    #print(mse)
    mses.append(mse)
    # argmax from pred_z and true_segs
    pred_segs.append(torch2numpy(torch.argmax(pred_z, dim=-1)))
    true_segs.append(torch2numpy(test_label[:, config['context_length']:]))


  indices = indices // dim


In [127]:


ground_truth = []
all_lb = []
all_ub = []
# calculate coverage and width of prediction intervals
for test_batch, test_label in tqdm(test_loader):
    pred = model.predict(test_batch, num_samples=100)
    true = test_batch[:, config['context_length']:]
    ground_truth.append(true)
    pred_y = pred['rec_n_forecast'][:,:,config['context_length']:]

    lb = get_quantile_forecast(pred_y, 0.1)
    ub = get_quantile_forecast(pred_y, 0.9) 
    all_lb.append(lb)
    all_ub.append(ub)

  indices = indices // dim


In [129]:
ground_truth = np.concatenate(ground_truth, 0)
all_lb = np.concatenate(all_lb, 0)
all_ub = np.concatenate(all_ub, 0)

In [130]:
ground_truth.shape
# co

(1000, 50, 1)

In [131]:
# this is numbers for everything together
# coverage
coverage = np.mean(np.logical_and(ground_truth >= all_lb, ground_truth <= all_ub))
# width
width = np.mean(all_ub - all_lb)

coverage, width

(0.36394, 2.3331621233702324)

In [9]:
config.keys()

dict_keys(['experiment', 'model', 'dataset', 'seg_labels', 'd_max', 'batch_size', 'context_length', 'prediction_length', 'freq', 'x_dim', 'obs_dim', 'log_dir', 'log_steps', 'model_dir', 'save_steps', 'num_categories', 'num_steps', 'num_samples', 'objective', 'flat_learning_rate', 'warmup_start_lr', 'learning_rate', 'grad_clip_norm', 'weight_decay', 'warmup_steps_lr', 'initial_switch', 'discrete_transition', 'continuous_transition', 'emission', 'inference', 'initial_state', 'control', 'transform_target', 'transform_only_scale', 'use_jacobian', 'forecast', 'switch_t_annealing', 'switch_t_init', 'switch_t_min', 'switch_t_annealing_rate', 'switch_t_annealing_steps', 'switch_t_annealing_kickin_steps', 'dur_t_annealing', 'dur_t_init', 'dur_t_min', 'dur_t_annealing_rate', 'dur_t_annealing_steps', 'dur_t_annealing_kickin_steps', 'cont_ent_anneal', 'xent_annealing', 'xent_init', 'xent_rate', 'xent_steps', 'xent_kickin_steps'])

100%|██████████| 50/50 [01:55<00:00,  2.31s/it]


(0.77975, 0.5269619214291451)

In [None]:

pred_segs = []
true_segs = []
for test_batch, test_label in tqdm(test_loader):
    test_batch = test_batch.to(device)
    pred = model.predict(test_batch, num_samples=100)
    true = test_batch[:, config['context_length']:]
    pred_segs.append(torch2numpy(torch.argmax(pred['z_emp_probs'], dim=-1)))
    true_segs.append(torch2numpy(test_label[:, config['context_length']:]))


In [106]:
seg_error = evaluation.evaluate_segmentation(np.concatenate(true_segs, 0), np.concatenate(pred_segs, 0), K=config["num_categories"])

seg_error

{'nmi_score': 0.10380844559277072,
 'ari_score': 0.140397887557092,
 'accuracy': 0.68736,
 'f1_score': 0.6873676161666078}

In [84]:
np.mean(mses)

0.08520873

In [97]:
pred_segs

tensor([[0, 0, 0,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1]])

## Calibration and conformal prediction