In [1]:
from __future__ import print_function, division, absolute_import

import gc

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

import numpy as np


In [2]:
from sklearn.datasets import make_moons
x_train, y_train = make_moons(n_samples = 5000, noise = 0.10)
x_test, y_test = make_moons(n_samples = 2000, noise = 0.10)
y_test = y_test.astype(np.float32)
y_train = y_train.astype(np.float32)

In [3]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable

np.random.seed(0)
torch.manual_seed(0)
#torch.cuda.manual_seed(0)

<torch._C.Generator at 0x7f79e2d54c90>

In [16]:
def log_gaussian_prob(x, mu, sigma, log_sigma = False):
    if not log_sigma:
        element_wise_log_prob = -0.5*torch.Tensor([np.log(2*np.pi)]).to(mu.device) - torch.log(sigma) - 0.5*(x-mu)**2 / sigma**2
    else:
        element_wise_log_prob = -0.5*torch.Tensor([np.log(2*np.pi)]).to(mu.device) - F.softplus(sigma) - 0.5*(x-mu)**2 / F.softplus(sigma)**2
    return element_wise_log_prob.sum()

class GaussianLinear(nn.Module):
    def __init__(self, in_dim, out_dim, stddev_prior = 1.0, bias=True):
        super(GaussianLinear, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.stddev_prior = stddev_prior
        self.w_mu = nn.Parameter(torch.Tensor(in_dim, out_dim).normal_(0, stddev_prior))
        self.w_rho = nn.Parameter(torch.Tensor(in_dim, out_dim).normal_(0, stddev_prior))
        self.b_mu = nn.Parameter(torch.Tensor(out_dim).normal_(0, stddev_prior)) if bias else None
        self.b_rho = nn.Parameter(torch.Tensor(out_dim).normal_(0, stddev_prior)) if bias else None
        self.bias = bias
        self.q_w = 0.
        self.p_w = 0.

    def forward(self, x, test=False):
        if test:
            w = self.w_mu
            b = self.b_mu if self.bias else None
        else:
            device = self.w_mu.device
            w_stddev = F.softplus(self.w_rho)
            b_stddev = F.softplus(self.b_rho) if self.bias else None
            w = self.w_mu + w_stddev * torch.Tensor(self.in_dim, self.out_dim).to(device).normal_(0,self.stddev_prior)
            b = self.b_mu + b_stddev * torch.Tensor(self.out_dim).to(device).normal_(0,self.stddev_prior) if self.bias else None
            self.q_w = log_gaussian_prob(w, self.w_mu, self.w_rho, log_sigma=True)
            self.p_w = log_gaussian_prob(w, torch.zeros_like(self.w_mu, device=device), self.stddev_prior*torch.ones_like(w_stddev, device=device))
            if self.bias:
                self.q_w += log_gaussian_prob(b, self.b_mu, self.b_rho, log_sigma=True)
                self.p_w += log_gaussian_prob(b, torch.zeros_like(self.b_mu, device=device), self.stddev_prior*torch.ones_like(b_stddev, device=device))
        output = x@w+b
        return output

    def get_pw(self):
        return self.p_w

    def get_qw(self):
        return self.q_w

class ModelBNN(nn.Module):
    def __init__(self, L, layers, stddev_prior = 1.0):
        super(ModelBNN, self).__init__()
        self.stddev_prior = stddev_prior
        
        self.layers = [L]+layers
        self.linear = nn.ModuleList([])
        for ii in range(1, len(self.layers)):
            self.linear.append(GaussianLinear(self.layers[ii-1], self.layers[ii], stddev_prior, bias = True))

        self.relu = nn.ReLU()

    def forward(self, x, test=False):
        y = x
        for ii in range(1, len(self.layers)):
            y = self.linear[ii-1](y, test)
            if ii != len(self.layers)-1:
                y = self.relu(y)
        return y

    def forward_samples(self, x, y, nb_samples=3):
        total_qw, total_pw, total_log_likelihood = 0., 0., 0.
        for _ in range(nb_samples):
            output = self.forward(x)
            total_qw += self.get_qw()
            total_pw += self.get_pw()
            total_log_likelihood += log_gaussian_prob(y, output, torch.ones_like(y))
        return total_qw / nb_samples, total_pw / nb_samples, total_log_likelihood / nb_samples

    def get_pw(self):
        return sum([self.linear[ii-1].p_w for ii in range(1, len(self.layers))])

    def get_qw(self):
        return sum([self.linear[ii-1].q_w for ii in range(1, len(self.layers))])


In [17]:
Nepochs = 500
layers = [100, 50, 5, 1]
L = 2
batch_size = 20

In [24]:
def mse_fn(model, x, y):
    return torch.pow(model(x)-y, 2).sum(1).mean(0)

def fit_model(layers, Nepochs, x_train, y_train):
    N = x_train.shape[0]
    L = x_train.shape[1]
    
    model = ModelBNN(L = L, layers = layers, stddev_prior = 1.0)
    #model = model.cuda()
    optimizer = optim.Adam(model.parameters())
    
    for i in range(Nepochs):
        old_batch = 0
        loss_ = 0
        nll_ = 0
        kl_ = 0
        mse_ = 0
        Nbatches = int(np.ceil(N/batch_size))
        pall = np.random.permutation(N)
        for batch in range(Nbatches):
            #p = np.random.choice(N, batch_size)
            p = pall[batch*batch_size : (batch+1)*batch_size]
            _x = x_train[p, :].astype(np.float32)
            _y = y_train[p, np.newaxis].astype(np.float32)
            
            x = torch.as_tensor(_x) #.cuda()
            y = torch.as_tensor(_y) #.cuda()
            
            qw, pw, llh = model.forward_samples(x, y)
            kl_loss = (qw-pw)/len(y)
            nll_loss = -llh/len(y)
            loss_total = nll_loss + kl_loss
            
            optimizer.zero_grad()
            loss_total.backward()
            optimizer.step()

            mse = mse_fn(model, x, y)
            
            loss_ += loss_total.item()
            nll_ += nll_loss.item()
            kl_ += kl_loss.item()
            mse_ += mse.item()
            print("Epoch {:>3d}, {:^12s}: loss = {:>5.4f}, nll = {:>5.4f}, kl = {:>5.4f}, mse = {:>5.4f}".format(i,
                                        "{:>3d}/{:>3d}".format(batch+1, Nbatches),
                                        (loss_)/float(batch+1), nll_/float(batch+1), kl_/float(batch+1),
                                        mse_/float(batch+1)
                                        ),
                  
                  end = '\r')


        loss_ /= float(Nbatches)
        nll_ /= float(Nbatches)
        kl_ /= float(Nbatches)
        mse_ /= float(Nbatches)
        print("Epoch {:>3d}, {:^12s}: loss = {:>5.4f}, nll = {:>5.4f}, kl = {:>5.4f}, mse = {:>5.4f}".format(i,
                                        "",
                                        (loss_), nll_, kl_,
                                        mse_
                                        ),
                  
                  end = '\n')            
    return model

In [None]:
model = fit_model(layers, Nepochs, x_train, y_train)

Epoch   0,             : loss = 12869.9625, nll = 12964.9841, kl = -95.0216, mse = 21567.7964
Epoch   1,             : loss = 4093.6338, nll = 4188.7968, kl = -95.1630, mse = 12190.7034
Epoch   2,             : loss = 1843.2898, nll = 1938.7241, kl = -95.4343, mse = 4349.7344
Epoch   3,             : loss = 1167.7464, nll = 1263.0289, kl = -95.2825, mse = 1940.59025
Epoch   4,             : loss = 637.2631, nll = 732.7223, kl = -95.4592, mse = 1208.3812
Epoch   5,             : loss = 367.1667, nll = 462.4606, kl = -95.2939, mse = 1129.486701
Epoch   6,             : loss = 158.7009, nll = 254.5199, kl = -95.8190, mse = 321.3978
Epoch   7,             : loss = 157.0631, nll = 252.7120, kl = -95.6489, mse = 400.34025
Epoch   8,             : loss = 80.6436, nll = 176.6459, kl = -96.0023, mse = 871.097747
Epoch   9,             : loss = 61.2992, nll = 157.2392, kl = -95.9400, mse = 286.97871
Epoch  10,             : loss = -4.8319, nll = 91.2997, kl = -96.1315, mse = 317.468972
Epoch  11

In [9]:
def getMeanStd(model, x, Npred = 50):
    # per-example predictions
    p = [None]*Npred
    x_ = Variable(torch.FloatTensor(x)) #.cuda()
    for k in range(Npred):
        p[k] = model(x_)
        p[k] = p[k].detach().numpy()[np.newaxis,...,0]
    p = np.concatenate(p, axis = 0)
    return np.mean(p, axis = 0), np.std(p, axis = 0)

In [10]:
def plot_contour(model, x, y, getFunction):
    # make countour
    mins = [np.min(x[:,0]), np.min(x[:,1])]
    maxs = [np.max(x[:,0]), np.max(x[:,1])]
    step = [(maxs[0] - mins[0])/50.0, (maxs[1] - mins[1])/50.0]
    bx, by = np.mgrid[mins[0]:(maxs[0]+0.5*step[0]):step[0], mins[1]:(maxs[1]+0.5*step[0]):step[1]]
    inputs = np.vstack([bx.flatten(), by.flatten()]).T
    inputs = inputs.astype(np.float32)

    pred_m, pred_s = getFunction(model, inputs, Npred = 50)
    pred_m_2d = pred_m.reshape( (-1, bx.shape[1]) )
    pred_s_2d = pred_s.reshape( (-1, bx.shape[1]) )

    # if one wants to smoothen the results
    #for data in [pred_m_2d, pred_s_2d]:
    #    data = gaussian_filter(data, 0.1)

    fig, ax = plt.subplots(nrows = 2, ncols = 1, sharex = True, figsize = (10, 8))
    cmap = sns.diverging_palette(250, 12, s=85, l=25, as_cmap=True)
    contour_s = ax[0].contourf(bx, by, pred_s_2d, cmap = cmap)
    cbar_s = plt.colorbar(contour_s, ax = ax[0])
    cbar_s.ax.set_ylabel('Unc.')
    contour_m = ax[1].contourf(bx, by, pred_m_2d, cmap = cmap)
    cbar_m = plt.colorbar(contour_m, ax = ax[1])
    cbar_m.ax.set_ylabel('Mean')
    for a in [ax[0], ax[1]]:
        a.scatter(x[y == 1,0], x[y == 1,1], color = 'r', marker = 's', s = 5, label = 'y = 1')
        a.scatter(x[y == 0,0], x[y == 0,1], color = 'b', marker = 's', s = 5, label = 'y = 0')
        a.set(xlabel = 'A', ylabel = 'B', title = '')
        a.set_xlim([mins[0], maxs[0]])
        a.set_ylim([mins[1], maxs[1]])
        a.legend(frameon = True)
    ax[0].set_xlabel('')
    fig.subplots_adjust(hspace = 0)
    fig.tight_layout()
    plt.show()

In [11]:
plot_contour(model, x_test, y_test, getFunction = getMeanStd)

NameError: name 'model' is not defined