In [1]:
# import necessary packages

import torch
import torch.nn as nn
import torch.optim as optim

import os
import numpy as np
import random
import pandas as pd

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt


# set random seed

seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

# set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# make sure cuda works
print(device)

cuda


In [2]:
# Consider a larger problem
from scipy.stats import invgamma,multivariate_normal

problem_size = 500
problem_dim = 20
noise_amp = 0.01

X = np.random.normal(size=(problem_size, problem_dim))

# Instead of drawing beta from uniform distribution
#beta_true = np.random.uniform(0, 10, size=problem_dim).reshape(problem_dim, 1)

# Pass on the true distribution of beta as the prior
beta_0_true = np.ones(20)*5.0
sigma_0_true = np.eye(20)*2.0

beta_true = multivariate_normal.rvs(mean = beta_0_true, cov = sigma_0_true)
print("True betas are: ", beta_true)

noise = np.random.normal(loc = 0.0, scale = noise_amp, size = problem_size)

y = X @ beta_true + noise
print(y.shape, X.shape, beta_true.shape, (X @ beta_true).shape, noise.shape)

True betas are:  [4.04046355 4.56795852 4.1551756  5.1561547  6.69306612 3.90958172
 6.41537392 3.89454875 3.8012741  6.15766761 6.30381493 6.20407548
 3.13918146 4.34104581 6.16388163 5.05874903 3.48156902 5.64815945
 3.98910988 7.53784138]
(500,) (500, 20) (20,) (500,) (500,)


In [3]:
# for speed
XtX = X.T @ X
beta_ols = np.linalg.inv(XtX) @ X.T @ y
print(beta_ols)

num_samples = 5000

[4.04096391 4.56875965 4.15529016 5.15640066 6.69319844 3.9093258
 6.4154151  3.89449418 3.80158741 6.15826358 6.30330249 6.20441974
 3.13907183 4.34037614 6.16337035 5.05857585 3.48101542 5.64752632
 3.98943725 7.53760881]


In [4]:
# Compute the sum of squared residual
def SSR(y, X, beta):
    residuals = y - np.dot(X, beta)
    squared_residuals = residuals ** 2
    SSR = np.sum(squared_residuals)
    
    return SSR

In [5]:
def gibbs_trueprior(y, X, num_iters, init, gamma_prior):
    
    n, p = X.shape
    
    # Allocate space for trace
    trace = np.empty((num_iters, p + 1))
    # Unpack initialization
    beta, sigma_squared = init
    beta_0 = beta
    # Unpack prior hyperparameters
    a_prior, b_prior = gamma_prior
    
    XtX = X.T @ X
    
    prior_mean = beta_0_true
    prior_var = sigma_0_true
    prior_precision = np.linalg.inv(prior_var)
    
    for i in range(num_iters):
        # assume all X entries independent, then the inverse of the prior precision matrix is identity
        V_beta = np.linalg.inv(XtX/sigma_squared + prior_precision)
        M_beta = V_beta @ (prior_precision @ prior_mean + X.T @ y/sigma_squared)
        
        beta = multivariate_normal.rvs(M_beta, V_beta)
        
        SSR_i = SSR(y, X, beta)
        sigma_squared = invgamma.rvs(a = 0.5*n+a_prior, scale = b_prior+0.5*SSR_i, size = 1)

        trace[i, :] = np.append(beta, sigma_squared)

    return trace

In [6]:
init = (np.ones(X.shape[1])*5, 2)
prior = (3,0.5)

result = gibbs_trueprior(y, X, 5000, init, prior)

print(result)

[[4.10276289e+00 4.61478405e+00 4.10783215e+00 ... 4.00370330e+00
  7.57127147e+00 9.37568564e-02]
 [4.04668611e+00 4.59628733e+00 4.16570614e+00 ... 4.01645234e+00
  7.52863446e+00 6.00594937e-03]
 [4.04276759e+00 4.56486573e+00 4.15219024e+00 ... 3.99060221e+00
  7.53859489e+00 2.46700876e-03]
 ...
 [4.04004230e+00 4.56918060e+00 4.15347495e+00 ... 3.98979502e+00
  7.53679583e+00 2.03060649e-03]
 [4.03696694e+00 4.57193826e+00 4.15627816e+00 ... 3.99097184e+00
  7.53807015e+00 2.16477214e-03]
 [4.04124386e+00 4.56695492e+00 4.15315900e+00 ... 3.98833666e+00
  7.53755242e+00 2.11359708e-03]]
