In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from tqdm import tqdm_notebook

In [3]:
import torch
from torch import nn
from torch.nn import functional as F
from pyro import distributions as dist

In [4]:
import torch.utils.data.dataloader as dataloader
import torchvision
from torchvision import transforms
from torchvision.datasets import MNIST

In [5]:
import pyro
import deepppl
import os

In [6]:
sns.set_style('whitegrid')


### Basic example showing the interface of *DeepPPL*
This example uses a NN: `MLP` adding uncertainity to its `parameters`

The `DeepPPL` model should be built with the `mlp` as keyword argument


In [7]:
with open('../tests/good/mlp.stan', 'r') as source:
    print(source.read())


networks {
    MLP mlp;
}

data {
    int batch_size;
    int <lower=0, upper=1> imgs[28,28,batch_size];
    int <lower=0, upper=10>  labels[batch_size];
}

parameters {
    real mlp.l1.weight[*];
    real mlp.l1.bias[*];
    real mlp.l2.weight[*];
    real mlp.l2.bias[*];
}

model {
    real logits[batch_size];
    mlp.l1.weight ~  normal(zeros(mlp.l1.weight$shape), ones(mlp.l1.weight$shape));
    mlp.l1.bias ~ normal(zeros(mlp.l1.bias$shape), ones(mlp.l1.bias$shape));
    mlp.l2.weight ~ normal(zeros(mlp.l2.weight$shape), ones(mlp.l2.weight$shape));
    mlp.l2.bias ~  normal(zeros(mlp.l2.bias$shape), ones(mlp.l2.bias$shape));

    logits = mlp(imgs);
    labels ~ categorical_logits(logits);
}

guide parameters {
    real l1wloc[mlp.l1.weight$shape];
    real l1wscale[mlp.l1.weight$shape];
    real l1bloc[mlp.l1.bias$shape];
    real l1bscale[mlp.l1.bias$shape];
    real l2wloc[mlp.l2.weight$shape];
    real l2wscale[mlp.l2.weight$shape];
    real l2bloc[mlp.l2.bias$shape];
    real 

In [8]:
batch_size, nx, nh, ny = 128, 28 * 28, 1024, 10

In [9]:
# Model

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.l1 = torch.nn.Linear(nx, nh)
        self.l2 = torch.nn.Linear(nh, ny)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        h = self.relu(self.l1(x.view((-1, nx))))
        yhat = self.l2(h)
        return F.log_softmax(yhat, dim=-1)

mlp = MLP()


In [10]:
def loadData(batch_size):
    train = MNIST(os.environ.get("DATA_DIR", '.') + "/data", train=True, download=True, transform=transforms.Compose([
        transforms.ToTensor(),  # ToTensor does min-max normalization.
    ]), )
    test = MNIST(os.environ.get("DATA_DIR", '.') + "/data", train=False, download=True, transform=transforms.Compose([
        transforms.ToTensor(),  # ToTensor does min-max normalization.
    ]), )
    dataloader_args = dict(shuffle=True, batch_size=batch_size,
                        num_workers=3, pin_memory=False)
    train_loader = dataloader.DataLoader(train, **dataloader_args)
    test_loader = dataloader.DataLoader(test, **dataloader_args)
    return train_loader, test_loader

In [11]:
def categorical_logits(logits):
    return dist.Categorical(logits=logits)

def predict(data, posterior):
    predictions = [model(data) for model in posterior]
    prediction = torch.stack(predictions).mean(dim=0)
    return prediction.argmax(dim=-1)


##### Build `DeepPPL` model

In [12]:
train_loader, test_loader = loadData(batch_size)
model = deepppl.PyroModel(model_file = '../tests/good/mlp.stan', mlp=mlp, categorical_logits=categorical_logits)
svi = model.svi(params = {'lr' : 0.01})

#### Train the model using `svi`

In [13]:
for epoch in range(2):  # loop over the dataset multiple times
    for j, (imgs, lbls) in enumerate(train_loader, 0):
        # calculate the loss and take a gradient step
        loss = svi.step(batch_size, imgs, lbls)
        if (j+1) % 100 == 0:
            print('Epoch:{} Iteration:{} Loss:{}'.format(epoch, j, loss))




Epoch:0 Iteration:99 Loss:401147.25935173035
Epoch:0 Iteration:199 Loss:228531.69188594818
Epoch:0 Iteration:299 Loss:149949.97155189514
Epoch:0 Iteration:399 Loss:101423.53450012207
Epoch:1 Iteration:99 Loss:60797.30235862732
Epoch:1 Iteration:199 Loss:48374.308643341064
Epoch:1 Iteration:299 Loss:34786.00557613373
Epoch:1 Iteration:399 Loss:29633.180724143982


##### Compute a posterior distribution
In this case, the distribution is a distribution over possible MLPs. 
Each MLP will give a prediction and the uncertainity can be seen in the distribution of the predicted values

In [14]:
posterior = svi.posterior(30)

#### Prediction
For each element in the testset, we expect the accuracy to be higher than 80%

In [15]:
for j, data in enumerate(test_loader):
    images, labels = data
    accuracy = (predict(images, posterior) == labels).type(torch.float).mean()
    assert accuracy > 0.8

And a single batch can be predicted

In [16]:
predict(images, posterior)

tensor([0, 4, 1, 5, 2, 5, 9, 7, 4, 0, 8, 4, 4, 1, 4, 2],
       grad_fn=<NotImplemented>)