In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
from random import randint
import time
import utils

In [2]:
train_data = pd.read_pickle("Student Training Data")
train_label = np.load("Partial Logistic Acceptance Student Training Label.npy")
test_data = pd.read_pickle("Student Testing Data")
test_label = np.load("Partial Logistic Acceptance Student Test Label.npy")

In [3]:
train_data = train_data.astype(np.double)
train_label = train_label.astype(np.double)
test_data = test_data.astype(np.double)
test_label = test_label.astype(np.double)

In [4]:
train_data = torch.FloatTensor(train_data.values)
train_label = torch.LongTensor(train_label)
test_data = torch.FloatTensor(test_data.values)
test_label = torch.LongTensor(test_label)


In [5]:
train_data.size()

torch.Size([40000, 9])

In [6]:
test_data.size()

torch.Size([10000, 9])

In [7]:
#device= torch.device("cuda")
device= torch.device("cpu")
print(device)

cpu


### Download the TRAINING SET (data+labels)

### Create a one layer neural net WITHOUT BIAS using pytorch. As usual, the weights are initialized to be small random numbers. Overwrite these weights with the matrix W containing the simple templates. Remember that mod.weight is a parameter object, not a tensor. To access the tensor inside the parameter you need to do mod.weight.data.

In [8]:
class three_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super().__init__()
        
        self.layer1 = nn.Linear( input_size, hidden_size1, bias=True)
        self.layer2 = nn.Linear( hidden_size1, hidden_size2, bias=True)
        self.layer3 = nn.Linear( hidden_size2, output_size, bias=True)
    
        
    def forward(self, x):
        
        x1       = self.layer1(x)
        x1_hat   = F.relu(x1)
        x2       = self.layer2(x1_hat)
        x2_hat   = F.relu(x2)
        scores   = self.layer3(x2_hat)
        
        return scores

In [9]:
net= three_layer_net(9,500,500, 2)
print(net)

three_layer_net(
  (layer1): Linear(in_features=9, out_features=500, bias=True)
  (layer2): Linear(in_features=500, out_features=500, bias=True)
  (layer3): Linear(in_features=500, out_features=2, bias=True)
)


In [10]:
mean= train_data.mean()

print(mean)

tensor(8371.0430)


In [11]:
std= train_data.std()

print(std)

tensor(46429.7305)


In [12]:
net = net.to(device)
mean = mean.to(device)
std = std.to(device)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( net.parameters() , lr=0.01 )
bs= 200

In [14]:
def eval_on_test_set():

    running_error=0
    num_batches=0

    with torch.no_grad():
        
        for i in range(0,10000,bs):

            # extract the minibatch
            minibatch_data =  test_data[i:i+bs]
            minibatch_label= test_label[i:i+bs]

            # send them to the gpu
            minibatch_data=minibatch_data.to(device)
            minibatch_label=minibatch_label.to(device)

            # reshape the minibatch
            inputs = minibatch_data.view(-1,9)
            
            # subtract the mean and divide by the std
            inputs= (inputs-mean)/std

            # feed it to the network
            scores=net( inputs ) 

            # compute the error made on this batch
            error = utils.get_error( scores , minibatch_label)

            # add it to the running error
            running_error += error.item()

            num_batches+=1


    # compute error rate on the full test set
    total_error = running_error/num_batches

    print( 'error rate on test set =', total_error*100 ,'percent')

In [15]:
start=time.time()

for epoch in range(9):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(40000)
 
    for count in range(0,40000,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        # reshape the minibatch
        inputs = minibatch_data.view(bs,9)
        
        # subtract the mean and divide by the std
        inputs= (inputs-mean)/std

        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=net( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss =  criterion( scores , minibatch_label) 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        
        

        # START COMPUTING STATS
        
        num_batches+=1
        
        with torch.no_grad():
            
            running_loss += loss.item()

            error = utils.get_error( scores , minibatch_label)
            running_error += error.item()        
        
       
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
    
    if epoch%1 == 0:
        print('epoch=',epoch, '\t time=', elapsed, '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        eval_on_test_set() 
        print(' ')

epoch= 0 	 time= 1.265125036239624 	 loss= 0.2111856495216489 	 error= 5.425000041723251 percent
error rate on test set = 3.680000424385071 percent
 
epoch= 1 	 time= 2.536781072616577 	 loss= 0.10507767787203193 	 error= 3.4650001227855687 percent
error rate on test set = 3.539999961853027 percent
 
epoch= 2 	 time= 3.6978728771209717 	 loss= 0.0896768094971776 	 error= 3.4124997258186336 percent
error rate on test set = 3.459999680519104 percent
 
epoch= 3 	 time= 4.886729001998901 	 loss= 0.08432191217318177 	 error= 3.442499876022339 percent
error rate on test set = 3.4699997901916504 percent
 
epoch= 4 	 time= 6.085116863250732 	 loss= 0.0819368288293481 	 error= 3.417499721050263 percent
error rate on test set = 3.459999680519104 percent
 
epoch= 5 	 time= 7.34667706489563 	 loss= 0.08075507368892432 	 error= 3.4174997508525844 percent
error rate on test set = 3.4699997901916504 percent
 
epoch= 6 	 time= 8.512911081314087 	 loss= 0.08009386810474098 	 error= 3.429999887943268 pe