# Notebook for building and testing the model     

In this notebook, I will build and run an initial test over the first 2,048 rows of the data to ensure proper functionality before deploying on midway2 to train on GPUs. 

In [179]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image

from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import sys

In [9]:
# cuda setup
device = torch.device("cuda")
kwargs = {'num_workers': 2, 'pin_memory': True} 

In [90]:
# hyper params
batch_size = 64
latent_size = 10
epochs = 10

In [4]:
# reading in the data
dat = np.genfromtxt('traindata/trip-2021-07-28.csv', delimiter=',')[1:,]

In [54]:
# need to subset this in a conditional fashion (i.e., each gamma value, here class c, has 1000 data points)
gamma = np.unique(dat[:,1])
idx = [np.where(dat[:,1] == gamma[i]) for i in np.arange(len(gamma))]

In [79]:
# create an 80/20 split in each data set
temp = [train_test_split(dat[idx[i][0],0], dat[idx[i][0],2], test_size=0.2, random_state=42) for i in np.arange(len(gamma))]

# for each gamma value...
Xltrain = []
Xltest = []
altrain = []
altest = []
for t in np.arange(len(temp)):
    Xltrain.append(temp[t][0])
    Xltest.append(temp[t][1])
    altrain.append(temp[t][2])
    altest.append(temp[t][3])

In [91]:
torch.utils.data.DataLoader(TensorDataset(torch.from_numpy(Xltrain[0]), torch.from_numpy(altrain[0])), batch_size=batch_size, shuffle=True, **kwargs)

<torch.utils.data.dataloader.DataLoader at 0x7fa92450b8e0>

In [590]:
class BaselineNet(nn.Module):
    def __init__(self, hidden1):
        super().__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(1, hidden1),
            nn.Tanh(),
            nn.Linear(hidden1, hidden1),
            nn.Tanh(),
            nn.Linear(hidden1, 50),
            nn.Tanh(),
            nn.Linear(50, 10),
            nn.Tanh(),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(1, 2),
            nn.Tanh(),
        )

        self.predict = nn.Sequential(
            nn.Linear(10 + 2, 1), 
        )
        self.double()

    def forward(self, x, g):
        x = self.fc1(x)
        g = self.fc2(g)
        a = self.predict(torch.cat((x,g), 1))
        return a

In [650]:
bnet = BaselineNet(500)
bnet.eval()
mse_loss = nn.MSELoss()
optimizer = optim.SGD(bnet.parameters(), lr=2e-4)

In [651]:
for e in range(2000):
    #pred = bnet(torch.from_numpy(Xltrain[0]).float(), torch.from_numpy(np.repeat(gamma[0], len(Xltrain[0]))).float())
    #pred = bnet(torch.unsqueeze(torch.tensor(Xltrain[5]),1), torch.unsqueeze(torch.tensor(np.repeat(gamma[5], len(Xltrain[5]))),1))
    pred = bnet(torch.unsqueeze(torch.tensor(np.hstack((Xltrain[0:2]))),1), torch.unsqueeze(torch.tensor(np.repeat(gamma[0:2],len(Xltrain[0]))),1))
    step_loss = mse_loss(pred, torch.unsqueeze(torch.tensor(np.hstack((altrain[0:2]))),1))

    optimizer.zero_grad()
    step_loss.backward()
    # update with current step regression parameters 
    optimizer.step()

    if e % 100 == 0:
        print ('epoch [{}], Loss: {:.2f}'.format(e, step_loss.item()))

epoch [0], Loss: 25861327.75
epoch [100], Loss: 18855595.18
epoch [200], Loss: 16333009.39
epoch [300], Loss: 15443805.50
epoch [400], Loss: 15130363.79
epoch [500], Loss: 15019876.52
epoch [600], Loss: 14980930.06
epoch [700], Loss: 14967201.54
epoch [800], Loss: 14962362.26
epoch [900], Loss: 14960656.42
epoch [1000], Loss: 14960055.10
epoch [1100], Loss: 14959843.13
epoch [1200], Loss: 14959768.41
epoch [1300], Loss: 14959742.06
epoch [1400], Loss: 14959732.77
epoch [1500], Loss: 14959729.50
epoch [1600], Loss: 14959728.34
epoch [1700], Loss: 14959727.92
epoch [1800], Loss: 14959727.78
epoch [1900], Loss: 14959727.72


In [652]:
pred

tensor([[3301.1692],
        [3301.1692],
        [3301.1692],
        ...,
        [3301.1692],
        [3301.1692],
        [3301.1692]], dtype=torch.float64, grad_fn=<AddmmBackward>)

In [649]:
#r2_score(y_true=altrain[5], y_pred=pred.detach().numpy())
#bnet(torch.unsqueeze(torch.tensor(Xltrain[5][0:1]),1), torch.unsqueeze(torch.tensor(np.repeat(gamma[5],1)),1))
#torch.unsqueeze(torch.cat((torch.tensor(Xltrain[5]),torch.tensor(Xltrain[0])),0),1)
torch.unsqueeze(torch.tensor(np.repeat(gamma[0:9],len(Xltrain[0]))),1).shape

torch.Size([7200, 1])

In [481]:
#torch.unsqueeze(torch.tensor(Xltrain[0]),1)
#bnet.forward(torch.tensor(Xltrain[0]), torch.tensor(np.repeat(gamma[0], len(Xltrain[0]))))
torch.tensor(Xltrain[0]).view(-1, torch.tensor(Xltrain[0]).size(0)).shape
torch.unsqueeze(torch.tensor(Xltrain[0]),1)
#print(torch.unsqueeze(torch.linspace(-1,1,10),-1).shape)
#bnet.forward(torch.tensor(Xltrain[0][1]),torch.tensor(gamma[0]))
#torch.unsqueeze(torch.tensor(Xltrain[0]),0).shape

800

In [514]:
bnet.predict[0].weight

Parameter containing:
tensor([[ 71.3519, -73.1773, -42.9027,  71.6761,  72.8176, -74.0732, -73.8141]],
       dtype=torch.float64, requires_grad=True)