In [41]:
import pandas as pd

# Obtains the file and cleans it
FILE_PATH = 'data/Advertising.csv' 
df = pd.read_csv(FILE_PATH).drop('Unnamed: 0', axis=1)

# Obtains our x and y vectors
x = df.TV.to_numpy()
y = df.Sales.to_numpy()

# Displays the dataframe
df

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [42]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

class KL_Divergence(nn.Module):
    def __init__(self, rand_size=100):
        super().__init__()

        # The parameters needed to do divergence
        self.mu_m = nn.Parameter(torch.tensor(1.0), requires_grad=True)
        self.sig_m = nn.Parameter(torch.tensor(1.0), requires_grad=True)
        self.mu_b = nn.Parameter(torch.tensor(1.0), requires_grad=True)
        self.sig_b = nn.Parameter(torch.tensor(1.0), requires_grad=True)
        self.noise = nn.Parameter(torch.tensor(1.0), requires_grad=True)
        
        # The size parameter used for random generation
        self.rand_size = rand_size
        
    def forward(self, x: torch.Tensor, y: torch.Tensor):
        # Randomly generated slope and intercept
        m = torch.normal(self.mu_m.item(), self.sig_m.item(), (self.rand_size,))
        b = torch.normal(self.mu_b.item(), self.sig_b.item(), (self.rand_size,))
        
        # Gets a list of values to sum
        res = [self.log_q(m[j], b[j]) - self.log_p(m[j], b[j], x, y) for j in range(len(m))]
        
        # Returns the sum of the values
        return torch.sum(torch.Tensor(res))
    
        
    def log_q(self, m_j, b_j):
        # Calculates the log_q for each m_j and b_j
        log_m = -((m_j - self.mu_m) ** 2 / 2 * self.sig_m ** 2 + 0.5 * torch.log(self.sig_m ** 2))
        log_b = -((b_j - self.mu_b) ** 2 / 2 * self.sig_b ** 2 + 0.5 * torch.log(self.sig_b ** 2))
        return log_m + log_b
    
    def log_p(self, m_j, b_j, x, y):
        # Calculates the log_p for each m_j and b_j
        vals = [(y[i] - x[i] * m_j - b_j) ** 2 / 2 * self.noise ** 2 + 0.5 * torch.log(self.noise ** 2) for i in range(len(x))]
        return torch.sum(torch.Tensor(vals)) + (m_j ** 2 / 2 + b_j ** 2 / 2)

In [43]:
# Trains the specific model
def kl_train(x, y, rand_size=100, model=None, epochs=5, frac=0.1):
    if model == None:
        model = KL_Divergence(rand_size=100)
    optimizer = optim.Adam(model.parameters())
    
    for epoch in tqdm(range(epochs), desc="Training..."):
        # Zeros gradiant for training
        optimizer.zero_grad()
        
        # Random selection of data points per iteration
        indices = torch.randint(low=0, high=110, size=(int(len(x) * 0.1), 1))
        
        # Calculates likelihood
        loglik = model(x[indices], y[indices])
        #loglik = model(x, y)
        
        # Mean of loglik
        e = -torch.mean(loglik)
        
        # Updates parameters
        e.backward()
        optimizer.step()
        
    return model

In [44]:
model_kl = kl_train(x, y)

Training...:   0%|          | 0/5 [00:00<?, ?it/s]


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [49]:
def func(x):
    print(x)
    return 1

vals = torch.Tensor([[1, 2, 3, 4, 5], [2, 3, 4, 5, 6]])
vals.apply_(func)

1.0
2.0
3.0
4.0
5.0
2.0
3.0
4.0
5.0
6.0


tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])