# Notebook for building and testing the model     

In this notebook, I will build and run an initial test over the first 2,048 rows of the data to ensure proper functionality before deploying on midway2 to train on GPUs. 

In [None]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image

from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import sys

In [None]:
# cuda setup
device = torch.device("cuda")
kwargs = {'num_workers': 2, 'pin_memory': True} 

In [831]:
# hyper params
batch_size = 128
latent_size = 10
epochs = 10

In [None]:
# reading in the data
dat = np.genfromtxt('traindata/trip-2021-07-28.csv', delimiter=',')[1:,]

In [None]:
# need to subset this in a conditional fashion (i.e., each gamma value, here class c, has 1000 data points)
gamma = np.unique(dat[:,1])
idx = [np.where(dat[:,1] == gamma[i]) for i in np.arange(len(gamma))]

In [None]:
# create an 80/20 split in each data set
temp = [train_test_split(dat[idx[i][0],0], dat[idx[i][0],2], test_size=0.2, random_state=42) for i in np.arange(len(gamma))]

# for each gamma value...
Xltrain = []
Xltest = []
altrain = []
altest = []
for t in np.arange(len(temp)):
    Xltrain.append(temp[t][0])
    Xltest.append(temp[t][1])
    altrain.append(temp[t][2])
    altest.append(temp[t][3])

In [873]:
# creating a simple matrix of training data (...x2, ...x1) 
tr_dat = TensorDataset(torch.tensor(np.vstack((np.hstack((Xltrain[0])), np.repeat(gamma[0],len(Xltrain[0])))).T), torch.tensor(np.hstack((altrain[0]))))

In [875]:
tr_dl = DataLoader(tr_dat, batch_size=64, shuffle=True, num_workers=2)

In [876]:
te_dat = TensorDataset(torch.tensor(np.vstack((np.hstack((Xltest[0])), np.repeat(gamma[0],len(Xltest[0])))).T), torch.tensor(np.hstack((altest[0]))))
te_dl = DataLoader(te_dat, num_workers=2)

In [867]:
class BaselineNet(nn.Module):
    def __init__(self, hidden1):
        super().__init__()
        self.fc1 = nn.Sequential(
            # nn.Linear(2, hidden1),
            # nn.Tanh(),
            # nn.Linear(hidden1, 50), 
            # nn.Tanh(),
            # nn.Linear(50, 1),
            nn.Linear(2, 1),
            nn.ReLU()
        )
        self.double()

    def forward(self, xg):
        a = self.fc1(xg)
        return a

In [877]:
bnet = BaselineNet(500)
bnet.eval()
mse_loss = nn.MSELoss()
optimizer = optim.Adagrad(bnet.parameters(), lr=2e-3)

In [878]:
len(tr_dl.dataset)

800

In [866]:
for i, (xg, a) in enumerate(tr_dl):
    print(xg)
    print(a)

    print(bnet(xg))

    if i==1:
        break

tensor([[ 1.6800e-02, -2.1544e+01],
        [ 2.8000e-03, -4.6416e+01],
        [ 1.0000e-03, -4.6416e+01],
        [ 9.9340e-01, -6.8129e+01],
        [ 6.0000e-04, -1.0000e+02],
        [ 2.8000e-03, -2.1544e+01],
        [ 8.5600e-02, -3.1623e+01],
        [ 1.0640e-01, -6.8129e+01],
        [ 6.0000e-04, -3.1623e+01],
        [ 1.6000e-03, -4.6416e+01],
        [ 4.0000e-04, -3.1623e+01],
        [ 1.8500e-01, -2.1544e+01],
        [ 9.6000e-03, -6.8129e+01],
        [ 1.4000e-03, -1.0000e+02],
        [ 1.6760e-01, -2.1544e+01],
        [ 8.8000e-03, -2.1544e+01],
        [ 5.3480e-01, -1.0000e+02],
        [ 6.7940e-01, -4.6416e+01],
        [ 7.4800e-01, -2.1544e+01],
        [ 8.0000e-03, -2.1544e+01],
        [ 4.0260e-01, -2.1544e+01],
        [ 4.0000e-04, -1.0000e+02],
        [ 2.8400e-02, -1.0000e+02],
        [ 8.8000e-03, -3.1623e+01],
        [ 2.2000e-03, -3.1623e+01],
        [ 2.6000e-03, -3.1623e+01],
        [ 1.0000e-03, -1.0000e+02],
        [ 2.0000e-03, -1.000

In [861]:
def train(dl, model, loss_fn, optim):
    size = len(dl.dataset)
    for batch, (xg, a) in enumerate(dl):
        # Compute prediction error
        pred = model(xg)
        loss = loss_fn(pred, a)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # if batch % 10 == 0:
        #     loss, current = loss.item(), batch * len(xg)
        #     print(f"loss: {loss:>.2f}  [{current:>2d}/{size:>2d}]")

        return loss.item()

In [862]:
def test(dl, model, loss_fn):
    size = len(dl.dataset)
    num_batches = len(dl)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for xg, a in dl:
            pred = model(xg)
            test_loss += loss_fn(pred, a).item()
    test_loss /= num_batches
    return test_loss
    #print(f"Test Error: Avg loss: {test_loss:>.2f} \n")

In [872]:
epochs = 500
for t in range(epochs):
    trloss = train(tr_dl, bnet, mse_loss, optimizer)
    teloss = test(te_dl, bnet, mse_loss)
    if t%100 == 0:
        print(f"Epoch {t+1} - ")
        print(f"\tTrain loss: {trloss:.2f}")
        print(f"\tTest loss: {teloss:.2f}")
print("Done!")

Epoch 1 - 
	Train loss: 2302621.37
	Test loss: 2490284.89
Epoch 101 - 
	Train loss: 2298347.18
	Test loss: 2486747.29
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

Traceback (most recent call last):
  File "/usr/local/anaconda3/envs/cvae/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-872-95088cac1c2a>", line 4, in <module>
    teloss = test(te_dl, bnet, mse_loss)
  File "<ipython-input-862-9119e7659a5f>", line 7, in test
    for xg, a in dl:
  File "/usr/local/anaconda3/envs/cvae/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 435, in __next__
    data = self._next_data()
  File "/usr/local/anaconda3/envs/cvae/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1068, in _next_data
    idx, data = self._get_data()
  File "/usr/local/anaconda3/envs/cvae/lib/python3.9/site-packages/to

TypeError: object of type 'NoneType' has no len()

In [806]:
preds = bnet(torch.tensor(np.vstack((np.hstack((Xltrain[0:5])), np.repeat(gamma[0:5],len(Xltrain[0])))).T))
preds

tensor([[5.1502],
        [5.1502],
        [5.1502],
        ...,
        [5.1500],
        [5.1500],
        [5.1500]], dtype=torch.float64, grad_fn=<AddmmBackward>)

In [None]:
r2_score(y_true=altrain[5], y_pred=pred.detach().numpy())
#bnet(torch.unsqueeze(torch.tensor(Xltrain[5][0:1]),1), torch.unsqueeze(torch.tensor(np.repeat(gamma[5],1)),1))
#torch.unsqueeze(torch.cat((torch.tensor(Xltrain[5]),torch.tensor(Xltrain[0])),0),1)
torch.unsqueeze(torch.tensor(np.repeat(gamma[0:9],len(Xltrain[0]))),1).shape

In [None]:
#torch.unsqueeze(torch.tensor(Xltrain[0]),1)
#bnet.forward(torch.tensor(Xltrain[0]), torch.tensor(np.repeat(gamma[0], len(Xltrain[0]))))
torch.tensor(Xltrain[0]).view(-1, torch.tensor(Xltrain[0]).size(0)).shape
torch.unsqueeze(torch.tensor(Xltrain[0]),1)
#print(torch.unsqueeze(torch.linspace(-1,1,10),-1).shape)
#bnet.forward(torch.tensor(Xltrain[0][1]),torch.tensor(gamma[0]))
#torch.unsqueeze(torch.tensor(Xltrain[0]),0).shape

## VAE implementation

Following the procedure in https://github.com/AntixK/PyTorch-VAE/blob/master/models/vanilla_vae.py, seems pretty similar to the architecture of the BaselineNet...

In [None]:
from torch.nn import functional as F

In [None]:
class BaseVAE(nn.Module):
    
    def __init__(self) -> None:
        super(BaseVAE, self).__init__()

    def encode(self, input: torch.Tensor) -> List[torch.Tensor]:
        raise NotImplementedError

    def decode(self, input: torch.Tensor) -> Any:
        raise NotImplementedError

    def sample(self, batch_size:int, current_device: int, **kwargs) -> torch.Tensor:
        raise RuntimeWarning()

    def generate(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
        raise NotImplementedError

    @abstractmethod
    def forward(self, *inputs: torch.Tensor) -> torch.Tensor:
        pass

    @abstractmethod
    def loss_function(self, *inputs: Any, **kwargs) -> torch.Tensor:
        pass

In [761]:
# from https://debuggercafe.com/getting-started-with-variational-autoencoder-using-pytorch/
# define a simple linear VAE
class LinearVAE(nn.Module):
    def __init__(self, features):
        super(LinearVAE, self).__init__()
 
        # encoder
        self.enc1 = nn.Linear(2, 48)
        self.enc2 = nn.Linear(48, features*2)
 
        # decoder 
        self.dec1 = nn.Linear(features, 24)
        self.dec2 = nn.Linear(24, 1)

        self.double()

    def reparameterize(self, mu, log_var):
        """
        :param mu: mean from the encoder's latent space
        :param log_var: log variance from the encoder's latent space
        """
        std = torch.exp(0.5*log_var) # standard deviation
        eps = torch.randn_like(std) # `randn_like` as we need the same size
        sample = mu + (eps * std) # sampling as if coming from the input space
        return sample
 
    def forward(self, x):
        # encoding
        x = F.tanh(self.enc1(x))
        x = self.enc2(x).view(-1, 2, features)

        # get `mu` and `log_var`
        mu = x[:, 0, :] # the first feature values as mean
        log_var = x[:, 1, :] # the other feature values as variance

        # get the latent vector through reparameterization
        z = self.reparameterize(mu, log_var)
 
        # decoding
        x = F.tanh(self.dec1(z))
        reconstruction = torch.sigmoid(self.dec2(x))
        return reconstruction, mu, log_var

In [762]:
batch_size = 512
model = LinearVAE(features=16) # dimensionality of the latent variable
optimizer = optim.SGD(model.parameters(), lr=1e-3)

In [None]:
def final_loss(mse_loss, mu, logvar):
    """
    This function will add the reconstruction loss (MSELoss) and the 
    KL-Divergence.
    KL-Divergence = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)

    :param mse_loss: recontruction loss
    :param mu: the mean from the latent vector
    :param logvar: log variance from the latent vector
    """
    MSE = mse_loss 
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return MSE + KLD

In [None]:
def train_vae(dl, model, loss_fn, optim):
    size = len(dl.dataset)
    for batch, (xg, a) in enumerate(dl):
        # Compute prediction error
        pred, mu, logvar = model(xg)
        step_loss = loss_fn(pred, a)

        loss = final_loss(step_loss, mu, logvar)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # if batch % 10 == 0:
        #     loss, current = loss.item(), batch * len(xg)
        #     print(f"loss: {loss:>.2f}  [{current:>2d}/{size:>2d}]")

        return loss.item()

In [None]:
def test_vae(dl, model, loss_fn):
    size = len(dl.dataset)
    num_batches = len(dl)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for xg, a in dl:
            pred, _, _ = model(xg)
            test_loss += loss_fn(pred, a).item()
    test_loss /= num_batches
    return test_loss

In [None]:
epochs = 500
for t in range(epochs):
    trloss = train(tr_dl, bnet, mse_loss, optimizer)
    teloss = test(te_dl, bnet, mse_loss)
    if t%100 == 0:
        print(f"Epoch {t+1} - ")
        print(f"\tTrain loss: {trloss:.2f}")
        print(f"\tTest loss: {teloss:.2f}")
print("Done!")

In [763]:
model.train()
for e in range(800):
    optimizer.zero_grad()
    
    pred, mu, logvar = model(torch.tensor(np.vstack((Xltrain[0],np.repeat(gamma[0],len(Xltrain[0])))).T))
    step_loss = mse_loss(pred, torch.unsqueeze(torch.tensor(np.hstack((altrain[0]))),1))

    loss = final_loss(step_loss, mu, logvar)
    loss.backward()

    # update with current step regression parameters 
    optimizer.step()

    if e % 100 == 0:
        print ('epoch [{}], Loss: {:.2f}'.format(e, loss.item()))

epoch [0], Loss: 295960.03
epoch [100], Loss: nan
epoch [200], Loss: nan
epoch [300], Loss: nan
epoch [400], Loss: nan
epoch [500], Loss: nan
epoch [600], Loss: nan
epoch [700], Loss: nan


In [747]:
torch.tensor(np.vstack((Xltrain[0],np.repeat(gamma[0],len(Xltrain[0])))).T).shape

torch.Size([800, 2])

tensor([[nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [n