In [19]:
import pandas as pd
import torch
import sys
import numpy as np

In [20]:
# obtaining data
df = pd.read_csv('data/Advertising.csv').drop('Unnamed: 0', axis=1)

X = torch.Tensor([df.TV.to_numpy(),
                  df.Radio.to_numpy(),
                  df.Newspaper.to_numpy(),
                np.repeat([1], 200)]).transpose(0, 1)
y = torch.Tensor([df.Sales.to_numpy()]).transpose(0, 1)

In [21]:
X.shape

torch.Size([200, 4])

In [22]:
y.shape

torch.Size([200, 1])

In [23]:
# prior parameters
a0 = torch.Tensor([1e-2])
b0 = torch.Tensor([1e-4])
c0 = torch.Tensor([1e-2])
d0 = torch.Tensor([1e-4])

In [24]:
# pre-process data
N = X.shape[0]
D = 1
if len(X.shape) > 1:
    D = X.shape[1]

X_corr = X.transpose(0, 1).matmul(X)
Xy_corr = X.transpose(0, 1).matmul(y)

an = a0 + N / 2
gammaln_an = torch.lgamma(torch.Tensor([an]))
cn = c0 + D / 2
gammaln_cn = torch.lgamma(torch.Tensor([cn]))

In [25]:
# iterate to find hyperparameters
L_last = -sys.float_info.max
max_iter = 500
E_a = c0 / d0

for i in range(max_iter):
    # covariance and weight of linear model
    invV = E_a * torch.eye(D) + X_corr
    V = torch.inverse(invV)
    logdetV = - torch.logdet(invV)
    w = V.matmul(Xy_corr)

    # parameters of noise model (an remains constant)
    sse = torch.sum((X.matmul(w) - y) ** 2)
    bn = b0 + 0.5 * (sse + E_a * w.transpose(0, 1).matmul(w))
    E_t = an / bn

    # hyperparameters of covariance prior (cn remains constant)
    dn = d0 + 0.5 * (E_t * w.transpose(0, 1).matmul(w) + torch.trace(V))
    E_a = cn / dn

    # variational bound, ignoring constant terms for now
    L = -0.5 * (E_t * sse + torch.sum(torch.sum(X * (X.matmul(V))))) + 0.5 * logdetV \
        - b0 * E_t + gammaln_an - an * torch.log(bn) + an \
        + gammaln_cn - cn * torch.log(dn)
    
    print("V_N:", V)
    print("w_N:", w)
    print("a_N:", an)
    print("b_N:", bn)
    print("c_N:", cn)
    print("d_N:", dn)
    print(L)
    
    # variational bound must grow!
    if L_last > L:
        print('Last bound:', L_last)
        print('Current bound:', L)
        print('Variational bound should not reduce')
        break

    # stop if change in variation bound is < 0.0000001%
    if abs(L_last - L) < abs(0.00000001 * L):
        break
    L_last = L

    if iter == max_iter:
        print('Bayesian linear regression reached maximum number of iterations')

    # augment variational bound with constant terms
    L = L - 0.5 * (N * torch.log(torch.Tensor([2 * torch.pi])) - D) - torch.lgamma(torch.Tensor([a0])) + a0 * torch.log(torch.Tensor([b0])) \
        - torch.lgamma(torch.Tensor([c0])) + c0 * torch.log(torch.Tensor([d0]))

V_N: tensor([[ 4.8700e-07, -9.8560e-07, -5.5491e-07, -2.1151e-05],
        [-9.8560e-07,  2.2559e-05, -8.0847e-06, -8.8585e-05],
        [-5.5491e-07, -8.0847e-06,  1.1136e-05, -4.7040e-05],
        [-2.1151e-05, -8.8585e-05, -4.7040e-05,  7.7388e-03]])
w_N: tensor([[0.0520],
        [0.2141],
        [0.0129],
        [0.6663]])
a_N: 100.01
b_N: tensor([[378.4602]])
c_N: 2.01
d_N: tensor([[0.0691]])
tensor([[-245.7125]])
V_N: tensor([[ 5.5736e-07, -6.9220e-07, -3.9856e-07, -4.6858e-05],
        [-6.9220e-07,  2.3837e-05, -7.4486e-06, -1.9657e-04],
        [-3.9856e-07, -7.4486e-06,  1.1497e-05, -1.0422e-04],
        [-4.6858e-05, -1.9657e-04, -1.0422e-04,  1.7152e-02]])
w_N: tensor([[0.0498],
        [0.2052],
        [0.0079],
        [1.4734]])
a_N: 100.01
b_N: tensor([[342.0013]])
c_N: 2.01
d_N: tensor([[0.3326]])
tensor([[-235.5653]])
V_N: tensor([[ 6.4109e-07, -3.4118e-07, -2.1238e-07, -7.7499e-05],
        [-3.4118e-07,  2.5326e-05, -6.6732e-06, -3.2529e-04],
        [-2.1238e-0

In [26]:
print("InvV:", invV)
print("V:", V)
print("logdetV:", logdetV)
print("w", w)
print("sse:", sse)
print("bn:", bn)
print("E_t:", E_t)
print("dn:", dn)
print("E_a:", E_a)
print("an:", an)
print("gammaln_an:", gammaln_an)
print("cn:", cn)
print("gammaln_cn:", gammaln_cn)

InvV: tensor([[5.7911e+06, 6.9806e+05, 9.1963e+05, 2.9409e+04],
        [6.9806e+05, 1.5211e+05, 1.6495e+05, 4.6528e+03],
        [9.1963e+05, 1.6495e+05, 2.8110e+05, 6.1108e+03],
        [2.9409e+04, 4.6528e+03, 6.1108e+03, 2.0143e+02]])
V: tensor([[ 6.7295e-07, -2.0749e-07, -1.4152e-07, -8.9162e-05],
        [-2.0749e-07,  2.5890e-05, -6.3769e-06, -3.7428e-04],
        [-1.4152e-07, -6.3769e-06,  1.2073e-05, -1.9831e-04],
        [-8.9162e-05, -3.7428e-04, -1.9831e-04,  3.2643e-02]])
logdetV: tensor(-41.5503)
w tensor([[ 4.6140e-02],
        [ 1.9010e-01],
        [-2.0082e-04],
        [ 2.8016e+00]])
sse: tensor(557.3759)
bn: tensor([[284.3359]])
E_t: tensor([[0.3517]])
dn: tensor([[1.4035]])
E_a: tensor([[1.4321]])
an: 100.01
gammaln_an: tensor([359.1802])
cn: 2.01
gammaln_cn: tensor([0.0043])
