In [None]:
# This section is the Neural Network implementation for the project

#From the project .pdf: 
#2.2 Neural networks
#•Apply a multilayer perceptron to this classification task.
#–What is a good choice of # layers?
#–What is a good choice of # hidden nodes in each layer?
#–What is a good choice of hidden activation functions?
#–What are good choices of learning rate and/or learning-rate schedule?
#–Do batch-norm and/or dropout help?
#•Apply a convolutional deep network to this classification task.
#–Similar questions as the multilayer perceptron, plus. . .
#–What is a good choice of # channels in each layer?
#–What is a good choice of kernel size?




In [1]:
# this block is for all imports for this implementation

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from torchvision.models import vgg16
from torch import tensor, cat
from sklearn.metrics import roc_auc_score, r2_score
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold
from sklearn import datasets, linear_model, preprocessing


In [2]:
# load training data
Xtr_loadpath = 'Xtr.csv'
Xts_loadpath = 'Xts.csv'
ytr_loadpath = 'ytr.csv'

Xtr = np.loadtxt(Xtr_loadpath, delimiter=",")
Xts = np.loadtxt(Xts_loadpath, delimiter=",")
ytr = np.loadtxt(ytr_loadpath, delimiter=",")

In [3]:
# standardize the training data
# Note: It appears preprocessing.scale() does not work
#Xtr_standardized = Xtr/495.561*2 - 1
#Xts_standardized = Xts/450.402*2 - 1
#ytr_standardized= ytr

standard = preprocessing.RobustScaler()

Xtr_standardized = standard.fit_transform(Xtr)
Xts_standardized = standard.fit_transform(Xts)
ytr_standardized= ytr

# save the standardized training data
Xtr_savepath = 'Xtr_sklearn.csv'
Xts_savepath = 'Xts_sklearn.csv'
ytr_savepath = 'ytr_sklearn.csv'
yts_hat_savepath = 'yts_hat_neural.csv'

np.savetxt(Xtr_savepath, Xtr_standardized, delimiter=",")
np.savetxt(Xts_savepath, Xts_standardized, delimiter=",")
np.savetxt(ytr_savepath, ytr_standardized, delimiter=",")

#Not sure if we need this
#nfold = 10
#kf = KFold(n_splits=nfold, shuffle=True, random_state=42)

In [4]:
# DATA LOADERS

from sklearn.model_selection import train_test_split
import torch.utils.data

Xtr, Xts, ytr, yts = train_test_split(Xtr_standardized, ytr_standardized,test_size=0.2,shuffle=True)

# Convert the arrays to PyTorch tensors
Xtr_torch = torch.Tensor(Xtr)
ytr_torch = torch.Tensor(ytr)
Xts_torch = torch.Tensor(Xts)
yts_torch = torch.Tensor(yts)

batch_size = 10

# Create a training Dataset
train_ds = torch.utils.data.TensorDataset(Xtr_torch, ytr_torch)
# Creates a training DataLoader from this Dataset
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True) 

# Create a testing Dataset
test_ds = torch.utils.data.TensorDataset(Xts_torch, yts_torch)
# Creates a testing DataLoader from this Dataset
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size) 

In [29]:
# Neural Network Creation
import torch.nn as nn

nin = Xtr_standardized.shape[1] # dimension of input data

nh = 4 # number of hidden units
nout = 1 # number of outputs = 10 since there are 10 classes (I think this is 8 now)

class Net(nn.Module):
    def __init__(self,nin,nh,nout):
        super(Net,self).__init__()
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.LeakyReLU()
        self.dropout = nn.Dropout(0.20)

        self.input = nn.Linear(nin,16)
        self.Hidden13 = nn.Linear(16,32)
        self.Hidden14 = nn.Linear(32,64)
        self.Hidden1 = nn.Linear(64,128)
        self.Hidden2 = nn.Linear(128, 256)
        self.Hidden3 = nn.Linear(256, 512)
        self.Hidden17 = nn.Linear(512,512)
        self.Hidden4 = nn.Linear(512, 1024)
        self.Hidden5 = nn.Linear(1024, 2048)
        self.Hidden11 = nn.Linear(2048, 4096)
        self.Hidden12 = nn.Linear(4096, 2048)
        self.Hidden6 = nn.Linear(2048, 1024)
        self.Hidden7 = nn.Linear(1024, 512)
        self.Hidden8 = nn.Linear(512, 256)
        self.Hidden9 = nn.Linear(256, 128)
        self.Hidden10 = nn.Linear(128, 64)
        self.Hidden15 = nn.Linear(64,32)
        self.Hidden16 = nn.Linear(32,16)
        self.output = nn.Linear(16,nout)
        
    def forward(self,x):
        x = (self.input(x))
        x = self.relu(x)

        x = (self.Hidden13(x))
        x = self.relu(x)

        x = (self.Hidden14(x))
        x = self.relu(x)
        
        x = (self.Hidden1(x))
        x = self.relu(x)

        x = (self.Hidden2(x))
        x = self.relu(x)
        
        x = (self.Hidden3(x))
        x = self.relu(x)

        x= (self.Hidden8(x))
        x = self.relu(x)

        x= (self.Hidden9(x))
        x = self.relu(x)

        x= (self.Hidden10(x))
        x = self.relu(x)

        x = (self.Hidden15(x))
        x = self.relu(x)

        x = (self.Hidden16(x))
        x = self.relu(x)

        x = self.sigmoid(self.output(x))
        return x

model = Net(nin=nin, nh=nh, nout=nout)

In [65]:
# Convolution Deep Network

import torch.nn as nn

nin = Xtr_standardized.shape[1] # dimension of input data

nh = 4 # number of hidden units
nout = 1 # number of outputs = 10 since there are 10 classes (I think this is 8 now)

class ConvNet(nn.Module):
    def __init__(self,nin,nh,nout):
        super(Net,self).__init__()
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.15)

        self.Conv1 = nn.Conv1d(nin, 16, 3, padding=1)
        #self.Conv12 = nn.Conv1d(64, 64, 5, padding=2)
        self.Conv2 = nn.Conv1d(16, 8, 3, padding=1)
        #self.Conv22 = nn.Conv1d(32, 32, 5, padding=2)
        self.Conv3 = nn.Conv1d(8, 4, 3, padding=1)
        #self.Conv32 = nn.Conv1d(16, 16, 3, padding=1)
        self.Conv4 = nn.Conv1d(4, nin, 3, padding=1)
        #self.Conv42 = nn.Conv1d(8, 8, 3, padding=1)

        self.bn0 = nn.BatchNorm1d(128)
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(512)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(2048)
        self.bn5 = nn.BatchNorm1d(64)
        self.bn6 = nn.BatchNorm1d(4096)

        self.input = nn.Linear(nin,64)
        self.Hidden1 = nn.Linear(64,128)
        self.Hidden2 = nn.Linear(128, 256)
        self.Hidden3 = nn.Linear(256, 512)
        self.Hidden4 = nn.Linear(512, 1024)
        self.Hidden5 = nn.Linear(1024, 2048)
        self.Hidden11 = nn.Linear(2048, 4096)
        self.Hidden12 = nn.Linear(4096, 2048)
        self.Hidden6 = nn.Linear(2048, 1024)
        self.Hidden7 = nn.Linear(1024, 512)
        self.Hidden8 = nn.Linear(512, 256)
        self.Hidden9 = nn.Linear(256, 128)
        self.Hidden10 = nn.Linear(128, 64)
        self.output = nn.Linear(64,nout)
        
    def forward(self,x):

        x = x.permute(1,0)
        x = self.relu(self.Conv1(x))
        #x = self.relu(self.Conv12(x))
        x = self.relu((self.Conv2(x)))
        #x = self.relu((self.Conv22(x)))
        x = self.relu((self.Conv3(x)))
        #x = self.relu((self.Conv32(x)))
        x = self.relu((self.Conv4(x)))
        #x = self.relu((self.Conv42(x)))
        x = x.permute(1, 0)

        x = self.relu(self.input(x))

        x = self.relu(self.Hidden1(x))
        x = self.dropout(x)
        x = self.bn0(x)

        x = self.relu(self.Hidden2(x))
        x = self.dropout(x)
        x = self.bn1(x)
        
        x = self.relu(self.Hidden3(x))
        x = self.dropout(x)
        x = self.bn2(x)
        
        #x= self.relu(self.Hidden4(x))
        #x = self.dropout(x)
       #x= self.bn3(x)
        
        # x= self.relu(self.Hidden5(x))
        # x= self.bn4(x)
        # x = self.dropout(x)

        # x = self.relu(self.Hidden11(x))
        # x = self.bn6(x)
        # x = self.dropout(x)

        # x = self.relu(self.Hidden12(x))
        # x = self.bn4(x)
        # x = self.dropout(x)

        # x= self.relu(self.Hidden6(x))
        # x= self.bn3(x)
        # x = self.dropout(x)

        #x= self.relu(self.Hidden7(x))
       # x= self.bn2(x)
       # x = self.dropout(x)

        x= self.relu(self.Hidden8(x))
        x= self.bn1(x)
        x = self.dropout(x)

        x= self.relu(self.Hidden9(x))
        x= self.bn0(x)
        x = self.dropout(x)

        x= self.relu(self.Hidden10(x))
        x= self.bn5(x)
        x = self.dropout(x)

        x = self.sigmoid(self.output(x))
        return x

#model = ConvNet(nin=nin, nh=nh, nout=nout)

In [31]:
# Get training and test accuracy
import torch.optim as optim
lr = 1e-3
# TODO
criterion = nn.BCELoss()
opt = optim.Adam(model.parameters(), lr=lr)
epochs = 10
lrate = lr

basic_tr_accuracy = []
basic_ts_accuracy = []

for epoch in range(epochs):
    correct = 0 # initialize error counter
    total = 0 # initialize total counter
    model.train() # put model in training mode
    # iterate over training set
    for train_iter, data in enumerate(train_loader):
        x_batch,y_batch = data
        y_batch = y_batch.to(torch.float32)
        out = model(x_batch)
        # Compute Loss
        loss = criterion(out.reshape(-1),y_batch)
        # Zero gradients
        opt.zero_grad()
        # Compute gradients using back propagation
        loss.backward()
        # Take an optimization 'step'
        opt.step()
        
        # Compute Accuracy
        guess = out.round().reshape(-1)
        correct += (guess == y_batch).sum().item()
        total += y_batch.size(0)
    
    acc = 100*(correct/total) # Compute accuracy over epoch

    basic_tr_accuracy.append(acc)
    
    correct = 0
    total = 0
    model.eval() # put model in evaluation mode
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            # Compute Accuracy
            guess = outputs.round().reshape(-1)
            correct += (guess == labels).sum().item()
            total += labels.size(0)
    
    basic_ts_accuracy.append(100*(correct/total))
    
    # Print details every print_mod epoch
    print('Epoch: {0:2d}   Train Accuracy: {1:.3f}%   Test Accuracy: {2:.3f}%'.format(epoch+1, basic_tr_accuracy[epoch], basic_ts_accuracy[epoch]))

print('Done!')

# Save
PATH = 'saved_basic_model.pt'
torch.save(model.state_dict(), PATH)

Epoch:  1   Train Accuracy: 85.900%   Test Accuracy: 86.100%
Epoch:  2   Train Accuracy: 86.562%   Test Accuracy: 85.850%
Epoch:  3   Train Accuracy: 86.475%   Test Accuracy: 86.400%
Epoch:  4   Train Accuracy: 86.188%   Test Accuracy: 86.500%
Epoch:  5   Train Accuracy: 86.487%   Test Accuracy: 86.200%
Epoch:  6   Train Accuracy: 86.675%   Test Accuracy: 86.100%
Epoch:  7   Train Accuracy: 86.625%   Test Accuracy: 85.900%
Epoch:  8   Train Accuracy: 86.900%   Test Accuracy: 86.600%
Epoch:  9   Train Accuracy: 87.087%   Test Accuracy: 86.200%
Epoch: 10   Train Accuracy: 87.237%   Test Accuracy: 85.250%
Done!


In [32]:
# compute the training accuracy
with torch.no_grad():
    predict = model(torch.Tensor(Xts)).detach().numpy().ravel()
auc = roc_auc_score(yts,predict)
print('training auc: ',auc)

training auc:  0.9088695214912846


In [15]:
# save the model: you must use the .pth format for pytorch models!
model_savepath = 'model.pth'

# To save a PyTorch model, we first pass an input through the model, 
# and then save the "trace". 
# For this purpose, we can use any input. 
# We will create a random input with the proper dimension.
x = torch.randn(nin) # random input
x = x[None,:] # add singleton batch index
with torch.no_grad():
    traced_cell = torch.jit.trace(model, (x))

# Now we save the trace
torch.jit.save(traced_cell, model_savepath)

In [8]:
# generate kaggle submission file using the validation script
!python {"validation.py " + model_savepath + " --Xts_path " + Xts_savepath + " --Xtr_path " + Xtr_savepath + " --yts_hat_path " + yts_hat_savepath } 

training auc =  0.9170509493075445
test label confidences saved in yts_hat_neural.csv
