In [32]:
import pandas as pd
import torch
import sys

In [33]:
# obtaining data
df = pd.read_csv('data/Advertising.csv').drop('Unnamed: 0', axis=1)

X = torch.Tensor([df.TV.to_numpy(),
                  df.Radio.to_numpy()]).transpose(0, 1)
y = torch.Tensor([df.Sales.to_numpy()]).transpose(0, 1)

In [34]:
X.shape

torch.Size([200, 2])

In [35]:
y.shape

torch.Size([200, 1])

In [36]:
# prior parameters
a0 = 1e-2
b0 = 1e-4
c0 = 1e-2
d0 = 1e-4

In [37]:
# pre-process data
N = X.shape[0]
D = 1
if len(X.shape) > 1:
    D = X.shape[1]

X_corr = X.transpose(0, 1).matmul(X)
Xy_corr = X.transpose(0, 1).matmul(y)

an = a0 + N / 2
gammaln_an = torch.lgamma(torch.Tensor([an]))
cn = c0 + D / 2
gammaln_cn = torch.lgamma(torch.Tensor([cn]))

In [39]:
# iterate to find hyperparameters
L_last = -sys.float_info.max
max_iter = 500
E_a = c0 / d0

for i in range(max_iter):
    # covariance and weight of linear model
    invV = E_a * torch.eye(D) + X_corr
    V = torch.inverse(invV)
    logdetV = - torch.logdet(invV)
    w = V * Xy_corr

    # parameters of noise model (an remains constant)
    sse = torch.sum((X.matmul(w) - y) ** 2)
    bn = b0 + 0.5 * (sse + E_a * w.transpose(0, 1).matmul(w))
    E_t = an / bn

    # hyperparameters of covariance prior (cn remains constant)
    dn = d0 + 0.5 * (E_t * w.transpose(0, 1).matmul(w) + torch.trace(V))
    E_a = cn / dn

    print("InvV:", invV)
    print("V:", V)
    print("logdetV:", logdetV)
    print("w", w)
    print("sse:", sse)
    print("bn:", bn)
    print("E_t:", E_t)
    print("dn:", dn)
    print("E_a:", E_a)
    print("an:", an)
    print("gammaln_an:", gammaln_an)
    print("cn:", cn)
    print("gammaln_cn:", gammaln_cn)

    # variational bound, ignoring constant terms for now
    L = -0.5 * (E_t * sse + torch.sum(torch.sum(X * (X.matmul(V))))) + 0.5 * logdetV \
        - b0 * E_t + gammaln_an - an * torch.log(bn) + an \
        + gammaln_cn - cn * torch.log(dn)
    
    print(L_last)
    print(L)
    # variational bound must grow!
    if L_last > L:
        print('Last bound:', L_last)
        print('Current bound:', L)
        print('Variational bound should not reduce')
        break

    # stop if change in variation bound is < 0.001%
    if abs(L_last - L) < abs(0.00001 * L):
        break
    L_last = L

    if iter == max_iter:
        print('Bayesian linear regression reached maximum number of iterations')

    # augment variational bound with constant terms
    L = L - 0.5 * (N * torch.log(torch.Tensor([2 * torch.pi])) - D) - torch.lgamma(torch.Tensor(a0)) + a0 * torch.log(torch.Tensor(b0)) \
        - torch.lgamma(torch.Tensor(c0)) + c0 * torch.log(torch.Tensor(d0))

InvV: tensor([[5791218.0000,  698061.9375],
        [ 698061.9375,  152207.8594]])
V: tensor([[ 3.8614e-07, -1.7709e-06],
        [-1.7709e-06,  1.4692e-05]])
logdetV: tensor(-26.7001)
w tensor([[ 0.1862, -0.8538],
        [-0.1313,  1.0891]])
sse: tensor(3864068.2500)
bn: tensor([[1932036.7500, 1932019.0000],
        [1932019.0000, 1932129.8750]])
E_t: tensor([[5.1764e-05, 5.1765e-05],
        [5.1765e-05, 5.1762e-05]])
dn: tensor([[1.0888e-04, 9.9725e-05],
        [9.9725e-05, 1.5710e-04]])
E_a: tensor([[ 9276.0996, 10127.8447],
        [10127.8447,  6429.0190]])
an: 100.01
gammaln_an: tensor([359.1802])
cn: 1.01
gammaln_cn: tensor([-0.0057])
-1.7976931348623157e+308
tensor([[-1093.5114, -1093.4229],
        [-1093.4229, -1093.8818]])


RuntimeError: Boolean value of Tensor with more than one value is ambiguous