# 401 Assignment 2

In [1]:
# imports
import random
import math
import numpy as np
import torch
import torchvision
import torchvision.transforms
import torch.optim
import torch.utils.data.sampler
import torch.nn.functional
from torch.autograd import Variable
import time

# settings

random.seed(2)

## 1. Gradient-based Learning with Tensors

### Defining R<sup>m</sup> to R<sup>n</sup> Function
 R<sup>m</sup> = 5 R<sup>n</sup> = 4 <br>
 all values m such that 1 <= m <= 10 <br>
 
 N1 = MA * (MB + MC) <br>
 N2 = MB * (MC + MD) <br>
 N3 = MC * (MD + ME) <br>
 N4 = MD * (ME + MA) <br>
 
 

In [41]:
def m_to_n_list_noisy(m):
    l = []
    for a in m:
        l.append(add_noise(m_to_n(a)))
    return l
    
def m_to_n(m):
    n = [
        m[0]*(m[1]+m[2]), 
        m[1]*(m[2]+m[3]), 
        m[2]*(m[3]+m[4]), 
        m[3]*(m[4]+m[0])
    ]
    return n

def add_noise(l):
    for a in range(len(l)):
        if random.random() > 0.5:
            l[a] = l[a] ** ( 1 + random.random()/10)
        else:
            l[a] = l[a] ** ( 1 - random.random()/10)
    return l
            
def generate_m_set(size_of_set, size_of_m):
    r = []
    for a in range(size_of_set):
        r.append([])
        for b in range(size_of_m):
            r[a].append(random.randint(1, 10))
    return r

def get_m_n(size, m):
    m = generate_m_set(size, m)
    n = m_to_n_list_noisy(m)
    return m, n

# m = generate_m_set(100, 5)
# print(m)
# n = m_to_n_list_noisy(m)
# print(n)

### Numpy Attempt

In [42]:
size = 100
train_size = math.floor(size/5*3)
x, y = get_m_n(size, 5)
x_train, y_train = x[:train_size], y[:train_size]
x_test, y_test = x[train_size:], y[train_size:]

np.random.seed(42)
a = np.random.randn(1)
b = np.random.randn(1)
learning_rate = 1e-1
epochs = 100

for i in range(epochs):
    yhat = a + b * x_train
    
    error = (y_train - yhat)
    
    loss = (error ** 2).mean()
    
    a_grad = -2 * error.mean()
    b_grad = -2 * (x_train * error).mean()
    
    a = a - learning_rate * a_grad
    b = b - learning_rate * b_grad
    
print(a, b)
    
    
# Sanity Check: do we get the same results as our gradient descent?
from sklearn.linear_model import LinearRegression
linr = LinearRegression()
linr.fit(x_train, y_train)
print(linr.intercept_, linr.coef_[0])

ValueError: operands could not be broadcast together with shapes (60,4) (60,5) 

### Torch Attempt

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from torchviz import make_dot
device = 'cpu'
size = 100
train_size = math.floor(size/5*3)

x, y = get_m_n(size, 5)
x_train, y_train = x[:train_size], y[:train_size]
x_test, y_test = x[train_size:], y[train_size:]

from sklearn.linear_model import LinearRegression
linr = LinearRegression()
linr.fit(x_train, y_train)
print(linr.intercept_, linr.coef_[0])

x_train_tensor = torch.from_numpy(np.asarray(x_train)).float().to(device)
y_train_tensor = torch.from_numpy(np.asarray(y_train)).float().to(device)

a = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)
b = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)

epochs = 1000
for e in range(epochs):
    yhat = (a + b) * x_train_tensor
    error = y_train_tensor - yhat
    loss = (error ** 2).mean()
    
    loss.backward()
    
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        
    a.grad.zero_()
    b.grad.zero_()
    
print(a,b)



## 2. Transfer Learning

### CIFAR

In [6]:
seed = 55
np.random.seed(seed)
torch.manual_seed(seed)
#The compose function allows for multiple transforms
#transforms.ToTensor() converts our PILImage to a tensor of shape (C x H x W) in the range [0,1]
#transforms.Normalize(mean,std) normalizes a tensor to a (mean, std) for (R, G, B)
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_set = torchvision.datasets.CIFAR10(root='./cifardata', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./cifardata', train=False, download=True, transform=transform)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#Training
n_training_samples = 20000
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_training_samples, dtype=np.int64))

#Validation
n_val_samples = 5000
val_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_training_samples, n_training_samples + n_val_samples, dtype=np.int64))

#Test
n_test_samples = 5000
test_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_test_samples, dtype=np.int64))


def get_train_loader(batch):
    return torch.utils.data.DataLoader(train_set, batch_size=batch,sampler=train_sampler, num_workers=2)

test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, sampler=test_sampler, num_workers=2)
val_loader = torch.utils.data.DataLoader(train_set, batch_size=128, sampler=val_sampler, num_workers=2)


#https://blog.algorithmia.com/convolutional-neural-nets-in-pytorch
class MyCNN(torch.nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        
        self.conv1 = torch.nn.Conv2d(3,18, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        self.fc1 = torch.nn.Linear(18*16*16, 64)
        
        self.fc2 = torch.nn.Linear(64, 10)
    
    def forward(self, x_val):
        
        x_val = torch.nn.functional.relu(self.conv1(x_val))
        
        x_val = self.pool(x_val)
        
        x_val = x_val.view(-1, 18 * 16 * 16)
        
        x_val = torch.nn.functional.relu(self.fc1(x_val))

        x_val = self.fc2(x_val)
        
        return x_val
    
def output_size(in_size, kernel_size, stride, padding):
    output = int((in_size - kernel_size + 2*padding) / stride) + 1
    return output

def create_loss_optimiser(neural_net, learning_rate):
    loss = torch.nn.CrossEntropyLoss()
    optimiser = torch.optim.Adam(neural_net.parameters(), lr=learning_rate)
    
    return loss, optimiser

def trainNet(net, batch_size, n_epochs, learning_rate):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    #Get training data
    train_loader = get_train_loader(batch_size)
    n_batches = len(train_loader)
    
    #Create our loss and optimizer functions
    loss, optimizer = create_loss_optimiser(net, learning_rate)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            #Get inputs
            inputs, labels = data
            
            #Wrap them in a Variable object
            inputs, labels = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.item()#.data[0]
            total_train_loss += loss_size.item()#.data[0]

            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        #At the end of the epoch, do a pass on the validation set
        total_val_loss = 0
        for inputs, labels in val_loader:
            
            #Wrap tensors in Variables
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Forward pass
            val_outputs = net(inputs)
            val_loss_size = loss(val_outputs, labels)
            total_val_loss += val_loss_size.item()#.data[0]
            
        print("Validation loss = {:.2f}".format(total_val_loss / len(val_loader)))
        
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))


CNN = MyCNN()
trainNet(CNN, batch_size=32, n_epochs=5, learning_rate=0.001)

Files already downloaded and verified
Files already downloaded and verified
===== HYPERPARAMETERS =====
batch_size= 32
epochs= 5
learning_rate= 0.001
Epoch 1, 10% 	 train_loss: 2.02 took: 3.70s
Epoch 1, 20% 	 train_loss: 1.79 took: 1.20s
Epoch 1, 30% 	 train_loss: 1.68 took: 1.21s
Epoch 1, 40% 	 train_loss: 1.64 took: 1.23s
Epoch 1, 50% 	 train_loss: 1.57 took: 1.21s
Epoch 1, 60% 	 train_loss: 1.51 took: 1.19s
Epoch 1, 70% 	 train_loss: 1.48 took: 1.21s
Epoch 1, 80% 	 train_loss: 1.46 took: 1.26s
Epoch 1, 90% 	 train_loss: 1.44 took: 1.21s
Validation loss = 1.34
Epoch 2, 10% 	 train_loss: 1.29 took: 3.43s
Epoch 2, 20% 	 train_loss: 1.32 took: 1.21s
Epoch 2, 30% 	 train_loss: 1.32 took: 1.27s
Epoch 2, 40% 	 train_loss: 1.31 took: 1.19s
Epoch 2, 50% 	 train_loss: 1.30 took: 1.23s
Epoch 2, 60% 	 train_loss: 1.27 took: 1.20s
Epoch 2, 70% 	 train_loss: 1.26 took: 1.26s
Epoch 2, 80% 	 train_loss: 1.27 took: 1.20s
Epoch 2, 90% 	 train_loss: 1.27 took: 1.19s
Validation loss = 1.27
Epoch 3, 10%