## [Forecasting I: univariate, heavy tailed](http://pyro.ai/examples/forecasting_i.html#Forecasting-I:-univariate,-heavy-tailed)

In [None]:
import torch

import pyro
import pyro.distributions as dist
import pyro.poutine as poutine

from pyro.contrib.examples.bart import load_bart_od
from pyro.contrib.forecast import ForecastingModel, Forecaster, backtest, eval_crps

from pyro.infer.reparam import LocScaleReparam, StableReparam
from pyro.ops.tensor_utils import periodic_cumsum, periodic_repeat, periodic_features
from pyro.ops.stats import quantile

import matplotlib.pyplot as plt

%matplotlib inline
assert pyro.__version__.startswith('1.8.3')
pyro.set_rng_seed(20200221)

In [None]:
dataset = load_bart_od()
print(dataset.keys())
print(dataset["counts"].shape)
print(" ".join(dataset["stations"]))

#### Pyro’s forecasting framework consists of: - a ForecastingModel base class, whose .model() method can be implemented for custom forecasting models, - a Forecaster class that trains and forecasts using ForecastingModels, and - a backtest() helper to evaluate models on a number of metrics.

##### a simple univariate dataset, say weekly BART train ridership aggregated over all stations in the network. 

In [None]:
T, O, D = dataset['counts'].shape

In [None]:
T // (24 * 7) * 24 * 7

In [None]:
data = dataset['counts'][:T // (24 * 7) * 24 * 7].reshape(T // (24*7), -1).sum(-1).log()

In [None]:
data = data.unsqueeze(-1)

In [None]:
data.shape

In [None]:
plt.figure(figsize=(9,3))
plt.plot(data)

In [None]:
class Model_1(ForecastingModel):
    # We then implement the .model() method. Since this is a generative model, it shouldn't
    # look at data; however it is convenient to see the shape of data we're supposed to
    # generate, so this inputs a zeros_like(data) tensor instead of the actual data.
    def model(self, zero_data, covariates):
        data_dim = zero_data.size(-1)
        feature_dim = covariates.size(-1)

        bias = pyro.sample('bias', dist.Normal(0, 10).expand([data_dim]).to_event(1))
        weight = pyro.sample('weight',  dist.Normal(0,0.1).expand([feature_dim]).to_event(1))
        predicttion = bias + (weight * covariates).sum(-1, keepdim = True)
        # The prediction should have the same shape as zero_data (duration, obs_dim),
        # but may have additional sample dimensions on the left.
        assert predicttion.shape[-2:] == zero_data.shape

        noise_scale = pyro.sample('noise_scale',  dist.LogNormal(-5, 5).expand([1]).to_event(1))
        noise_dist = dist.Normal(0, noise_scale)

        self.predict(noise_dist, predicttion)

In [None]:
T0 = 0
T2 = data.size(-2)
T1 = T2 - 52

In [None]:
T2

In [None]:
time = torch.arange(float(T2)) / 365

In [None]:
time.shape

In [None]:
covariates = torch.stack([time], dim=-1)

In [None]:
covariates.shape

In [None]:
data[:T1].size(-1)

In [None]:
%%time
pyro.set_rng_seed(111)
pyro.clear_param_store()

forecaster = Forecaster(Model_1(), data[:T1], covariates[:T1], learning_rate=0.1)

####  Drawing posterior samples from the forecaster, passing in full covariates but only partial data. 

In [None]:
samples = forecaster(data[:T1], covariates, num_samples=1000)

In [None]:
p10, p50, p90 = quantile(samples, (0.1, 0.5, 0.9)).squeeze(-1)

In [None]:
p10

 [Continuous Ranked Probability Score](https://www.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf)

In [None]:
crps = eval_crps(samples, data[T1:])

In [None]:
print(samples.shape, p10.shape)

In [None]:
plt.figure(figsize=(9,3))
plt.fill_between(torch.arange(T1, T2), p10, p90, color="red", alpha=0.3)
plt.plot(torch.arange(T1, T2), p50, 'r-', label='forecast')
plt.plot(data, 'k-', label='truth')
plt.ylabel('$\log y$ rides')
plt.xlabel('Weeks after 2011-01-01')
plt.title('Total weekly ridership (CRPS={:0.3g})'.format(crps))
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(9,3))
plt.fill_between(torch.arange(T1, T2), p10, p90, color="red", alpha=0.3)
plt.plot(torch.arange(T1, T2), p50, 'r-', label='forecast')
plt.plot(torch.arange(T1, T2), data[T1:], 'k-', label='truth')
plt.ylabel('$\log y$ rides')
plt.xlabel('Weeks after 2011-01-01')
plt.title('Total weekly ridership (CRPS={:0.3g})'.format(crps))
plt.legend(loc='best')

### Adding Periodic Features

In [None]:
periodic_features(T2, 365.25/7).shape

In [None]:
time.unsqueeze(-1).shape

In [None]:
covariates = torch.cat([time.unsqueeze(-1), periodic_features(T2, 365 / 7)], dim=-1)

In [None]:
covariates.shape

In [None]:
covariates.shape[-1]

In [None]:
%%time
pyro.set_rng_seed(111)
pyro.clear_param_store()

forecaster = Forecaster(Model_1(), data[:T1], covariates[:T1], learning_rate=0.1)

In [None]:
samples = forecaster(data[:T1], covariates, num_samples=1000)
p10, p50, p90 = quantile(samples, (0.1, 0.5, 0.9)).squeeze(-1)
crps = eval_crps(samples, data[T1:])

plt.figure(figsize=(9, 3))
plt.fill_between(torch.arange(T1, T2), p10, p90, color="red", alpha=0.3)
plt.plot(torch.arange(T1, T2), p50, 'r-', label='forecast')
plt.plot(data, 'k-', label='truth')
plt.title("Total weekly ridership (CRPS = {:0.3g})".format(crps))
plt.ylabel("log(# rides)")
plt.xlabel("Week after 2011-01-01")
plt.xlim(0, None)
plt.legend(loc="best");

In [None]:
plt.figure(figsize=(9, 3))
plt.fill_between(torch.arange(T1, T2), p10, p90, color="red", alpha=0.3)
plt.plot(torch.arange(T1, T2), p50, 'r-', label='forecast')
plt.plot(torch.arange(T1, T2), data[T1:], 'k-', label='truth')
plt.title("Total weekly ridership (CRPS = {:0.3g})".format(crps))
plt.ylabel("log(# rides)")
plt.xlabel("Week after 2011-01-01")
plt.xlim(T1, None)
plt.legend(loc="best");

### [Time-local random variables](http://pyro.ai/examples/forecasting_i.html#Time-local-random-variables:-self.time_plate)

In [40]:
pyro.sample('bias', dist.Normal(0, 10).expand([10]).to_event(1))

tensor([  7.1916,  -0.4106,   0.8071,   4.2164,  -6.6785, -10.1081, -10.6733,
         -3.7762, -11.8220,   9.2675])

In [41]:
pyro.sample('drift_scale', 
                                        dist.LogNormal(-20, 5).expand([1]).to_event(1))

tensor([7.5031e-07])

In [None]:
class Model_2(ForecastingModel):
    def model(self, zero_data, covariates):
        data_dim = zero_data.size(-1)
        feature_dim = covariates.size(-1)
        bias = pyro.sample('bias', dist.Normal(0, 10).expand([data_dim]).to_event(1))
        weight = pyro.sample('weight', dist.Normal(0, 0.1).expand([feature_dim]).to_event(1))
        # time-global scale parameter
        drift_scale = pyro.sample('drift_scale', 
                                        dist.LogNormal(-20, 5).expand([1]).to_event(1))
        
        # time-local iid noise inside time plate
        with self._time_plate:
        # We'll use a reparameterizer to improve variational fit. The model would still be
        # correct if you removed this context manager, but the fit appears to be worse. 
            with poutine.reparam(config={'drift': LocScaleReparam()}):
                drift = pyro.sample('drift', dist.Normal(zero_data, drift_scale).to_event(1))

        # After we sample the iid "drift" noise we can combine it in any time-dependent way.
        # It is important to keep everything inside the plate independent and apply dependent
        # transforms outside the plate.   

        motion = drift.cumsum(-2) # A Brownian motion

        
