# Notebook for building and testing the model     

In this notebook, I will build and run an initial test over the first 2,048 rows of the data to ensure proper functionality before deploying on midway2 to train on GPUs. 

In [179]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image

from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import sys

In [9]:
# cuda setup
device = torch.device("cuda")
kwargs = {'num_workers': 2, 'pin_memory': True} 

In [90]:
# hyper params
batch_size = 64
latent_size = 10
epochs = 10

In [4]:
# reading in the data
dat = np.genfromtxt('traindata/trip-2021-07-28.csv', delimiter=',')[1:,]

In [54]:
# need to subset this in a conditional fashion (i.e., each gamma value, here class c, has 1000 data points)
gamma = np.unique(dat[:,1])
idx = [np.where(dat[:,1] == gamma[i]) for i in np.arange(len(gamma))]

In [79]:
# create an 80/20 split in each data set
temp = [train_test_split(dat[idx[i][0],0], dat[idx[i][0],2], test_size=0.2, random_state=42) for i in np.arange(len(gamma))]

# for each gamma value...
Xltrain = []
Xltest = []
altrain = []
altest = []
for t in np.arange(len(temp)):
    Xltrain.append(temp[t][0])
    Xltest.append(temp[t][1])
    altrain.append(temp[t][2])
    altest.append(temp[t][3])

In [91]:
torch.utils.data.DataLoader(TensorDataset(torch.from_numpy(Xltrain[0]), torch.from_numpy(altrain[0])), batch_size=batch_size, shuffle=True, **kwargs)

<torch.utils.data.dataloader.DataLoader at 0x7fa92450b8e0>

In [487]:
class BaselineNet(nn.Module):
    def __init__(self, hidden1):
        super().__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(1, hidden1),
            nn.Linear(hidden1, 5),
            nn.Tanh(),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(1, 2),
            nn.Tanh(),
        )

        self.predict = nn.Sequential(
            nn.Linear(5 + 2, 1), 
            nn.ReLU(),
        )
        self.double()

    def forward(self, x, g):
        #x = self.fc1(x.view(-1, x.size(0)))
        x = self.fc1(x)
        g = self.fc2(g)
        # print(x)
        # print(g)
        a = self.predict(torch.cat((x,g), 1))
        return a

In [505]:
bnet = BaselineNet(20)
bnet.eval()
mse_loss = nn.MSELoss()
optimizer = optim.SGD(bnet.parameters(), lr=2e-3)

In [506]:
for e in range(1000):
    #pred = bnet(torch.from_numpy(Xltrain[0]).float(), torch.from_numpy(np.repeat(gamma[0], len(Xltrain[0]))).float())
    pred = bnet(torch.unsqueeze(torch.tensor(Xltrain[0]),1), torch.unsqueeze(torch.tensor(np.repeat(gamma[0], len(Xltrain[0]))),1))
    step_loss = mse_loss(pred, torch.unsqueeze(torch.tensor(altrain[0]),1))

    optimizer.zero_grad()
    step_loss.backward()
    # update with current step regression parameters 
    optimizer.step()

    if e % 100 == 0:
        print ('epoch [{}], Loss: {:.2f}'.format(e, step_loss.item()))

epoch [0], Loss: 292808.07
epoch [100], Loss: 74485.30
epoch [200], Loss: 74146.65
epoch [300], Loss: 74140.43
epoch [400], Loss: 74170.19
epoch [500], Loss: 74146.29
epoch [600], Loss: 74146.26
epoch [700], Loss: 74146.26
epoch [800], Loss: 74146.26
epoch [900], Loss: 74146.26


In [507]:
r2_score(y_true=altrain[0], y_pred=pred.detach().numpy())
pred[0:10]

tensor([[468.1862],
        [468.1862],
        [468.1862],
        [468.1862],
        [468.1862],
        [468.1862],
        [468.1862],
        [468.1862],
        [468.1862],
        [468.1862]], dtype=torch.float64, grad_fn=<SliceBackward>)

In [481]:
#torch.unsqueeze(torch.tensor(Xltrain[0]),1)
#bnet.forward(torch.tensor(Xltrain[0]), torch.tensor(np.repeat(gamma[0], len(Xltrain[0]))))
torch.tensor(Xltrain[0]).view(-1, torch.tensor(Xltrain[0]).size(0)).shape
torch.unsqueeze(torch.tensor(Xltrain[0]),1)
#print(torch.unsqueeze(torch.linspace(-1,1,10),-1).shape)
#bnet.forward(torch.tensor(Xltrain[0][1]),torch.tensor(gamma[0]))
#torch.unsqueeze(torch.tensor(Xltrain[0]),0).shape

800

In [491]:
bnet.fc1[0].weight

Parameter containing:
tensor([[ 2.4035],
        [-1.1682],
        [ 1.2436],
        [-3.9618],
        [-4.1732],
        [-0.2449],
        [ 0.1441],
        [-0.3814],
        [-0.0617],
        [-1.2677],
        [ 2.8297],
        [-1.2981],
        [-4.5600],
        [-3.7503],
        [-2.9979],
        [-0.9083],
        [ 0.4969],
        [ 0.0278],
        [-0.7822],
        [-4.4803]], dtype=torch.float64, requires_grad=True)