# Recovering a regression model with VI

i.e. get a posterior over the weights

In [1]:
import random
import torch as t
import torch.nn as nn

import numpy as np

import sys; sys.path.append("..")
import utils as u

# Gen example data

In [2]:
# Example 
PRIOR_MEAN = 3
PRIOR_VAR = 3.0
GROUND_L_SIGMA = 1.5

N = 200
w = 10
interval = -100, 100
X, Y = u.generate_linear_data(N, w, GROUND_L_SIGMA, interval)

In [3]:
class VI(nn.Module):
    def __init__(self, n=100):
        super(VI, self).__init__()
        
        self.batch_size = n # latent samples per step
        self.soft_plus = nn.Softplus()
        
        # adaptive variational params
        self.q_mean = self.param_1D()
        self.q_sigma = self.param_1D()
        self.prior_mean = self.param_1D() \
                            .data.fill_(PRIOR_MEAN)
        self.prior_sigma = self.param_1D() \
                            .data.fill_(PRIOR_VAR)
        # FloatTensor(1) returns vast numbers sometimes?
        self.likelihood_sigma = self.param_1D() \
                                .data.fill_(GROUND_L_SIGMA)
        
    
    def param_1D(self) :
        return nn.Parameter(t.randn(1,1), requires_grad=True)
    
    
    def generate_noise(self):
        return np.random.normal(size=(self.batch_size,1))
    
    
    def get_mean(self) :
        return self.q_mean.data.numpy()
    
    
    def get_var(self) :
        torch_var = self.soft_plus(self.q_sigma) \
                    .data**2
        return torch_var.numpy()
    
    
    def reparametrise(self, eps):
        eps = nn.Parameter(t.FloatTensor(eps))
        
        return eps.mul(self.soft_plus(self.q_sigma)) \
                .add(self.q_mean)
    
    
    def log_prob(self, y, x) :
        return u.log_norm(y, x, self.likelihood_sigma)
    
    
    def elbo(self, x, y):
        eps = self.generate_noise()
        z = self.reparametrise(eps)
        
        q_log = u.log_norm(z, self.q_mean, self.soft_plus(self.q_sigma))
        q_likelihood = t.mean(q_log)
        prior_log = u.log_norm(z, self.prior_mean, self.prior_sigma)
        prior_estimate = t.mean(prior_log)
        
        xzt = x * z.transpose(0,1)
        sum_log_prob = t.sum(self.log_prob(y, xzt), 0)
        likelihood = t.mean(sum_log_prob)
        
        kl_div_mc = q_likelihood - prior_estimate
        
        return likelihood - kl_div_mc

# Optimise it

In [4]:
def run(X, Y, ep=5000, eta=0.1) :
    q = VI()
    optimiser = t.optim.Adam(q.parameters(), lr=eta)
    x = nn.Parameter(t.Tensor(X), requires_grad=False) 
    y = nn.Parameter(t.Tensor(Y), requires_grad=False)

    optimise(q, x, y, optimiser, ep)
    
    return q


def optimise(q, x, y, optimiser, ep, verbose=False) :
    for i in range(ep):
        loss = - q.elbo(x, y)
        optimiser.zero_grad()
        loss.backward(retain_graph=True)
        optimiser.step()

        if verbose :
            if i % 500 == 0:
                print(q.get_mean(), q.get_var())


def amortise_runs(m) :
    means = []

    for i in range(10) :
        q = run(X, Y, ep=EPOCHS)
        means.append(q.get_mean())
        
    return means




# Eval

In [5]:
def get_q_error(q, X, Y) :
    TRUE_POST_MEAN = u.analytical_posterior_mean(PRIOR_MEAN, PRIOR_VAR, X, Y)
    TRUE_POST_VAR = u.analytical_posterior_var(PRIOR_VAR, X)
    
    mean_error = q.get_mean() - TRUE_POST_MEAN
    var_error = q.get_var() - TRUE_POST_VAR
    
    return mean_error[0][0], var_error[0][0]
            

EPOCHS = 5000
q = run(X, Y, ep=EPOCHS)
get_q_error(q, X, Y)

(-8.857345525115079e-05, 6.593544540482453e-06)