# 401 Assignment 2

In [47]:
# imports
import random
import math
import numpy as np
import torch
import torchvision
import torchvision.transforms
import torch.optim
import torch.utils.data.sampler
import torch.nn.functional
from torch.autograd import Variable
import time

# settings

random.seed(55)

## 1. Gradient-based Learning with Tensors

### Defining R<sup>m</sup> to R<sup>n</sup> Function
 R<sup>m</sup> = 5 R<sup>n</sup> = 4 <br>
 

In [41]:
def create_output_tensor_noisy(input, weights):
    tensor_a = input#torch.from_numpy(input)
    tensor_b = torch.from_numpy(weights)
    output = tensor_a.t()@tensor_b.t()
    noise = np.random.normal(loc=0, scale=0.01, size=np.shape(input.size))
    output = output + torch.from_numpy(noise)
    return output

def mse(in1, in2):
    diff = in1 - in2
    return torch.sum(diff*diff) / diff.numel()

def model(x,w,b):
    return x.t() @ w.t() +b

def train(input, target, w, b, i, learning_rate=1e-2):
    prediction = model(input, w,b)
    loss = mse(prediction, target)
    if i % 100 == 0:
        print("epoch " + str(i) + " loss = " + str(loss))
    loss.backward(retain_graph=True)
    with torch.no_grad():
        w -= w.grad * learning_rate
        b -= b.grad * learning_rate
        w.grad.zero_()
        b.grad.zero_()

def test(input, target, w, b):
    prediction = model(input, w, b)
    print('Expected:')
    print(target.data.numpy())
    print('Prediction:')
    print(prediction.data.numpy())



eqtn = np.array([[3.7, 2.3, 1, 0, 5],
      [4.7, 8.1, 2.5,  0, 25],
      [2.7, 1, 9, 3.9, 0],
      [5.2, 6, 2, 0, 0]], dtype=np.float32)
tensor_in = torch.randn(5,100, requires_grad=True)
tensor_target = create_output_tensor_noisy(tensor_in, eqtn)     
weights = torch.randn(4,5, requires_grad=True)
biases = torch.randn(4, requires_grad=True)

for i in range(1,2001):
    train(input= tensor_in, target= tensor_target, w = weights, b = biases, i = i)

tensor_test_in = torch.randn(5,10, requires_grad=True)
tensor_test_target = create_output_tensor_noisy(tensor_test_in, eqtn)  
test(tensor_test_in, tensor_test_target, weights, biases)

print('**********')
print('known linear weights:')
print(eqtn)
print('****')
print('found linear weights:')
print(weights)
print('**********')


epoch 100 loss = tensor(88.3999, grad_fn=<DivBackward0>)
epoch 200 loss = tensor(32.7373, grad_fn=<DivBackward0>)
epoch 300 loss = tensor(12.3499, grad_fn=<DivBackward0>)
epoch 400 loss = tensor(4.7476, grad_fn=<DivBackward0>)
epoch 500 loss = tensor(1.8630, grad_fn=<DivBackward0>)
epoch 600 loss = tensor(0.7483, grad_fn=<DivBackward0>)
epoch 700 loss = tensor(0.3086, grad_fn=<DivBackward0>)
epoch 800 loss = tensor(0.1311, grad_fn=<DivBackward0>)
epoch 900 loss = tensor(0.0576, grad_fn=<DivBackward0>)
epoch 1000 loss = tensor(0.0261, grad_fn=<DivBackward0>)
epoch 1100 loss = tensor(0.0122, grad_fn=<DivBackward0>)
epoch 1200 loss = tensor(0.0059, grad_fn=<DivBackward0>)
epoch 1300 loss = tensor(0.0029, grad_fn=<DivBackward0>)
epoch 1400 loss = tensor(0.0015, grad_fn=<DivBackward0>)
epoch 1500 loss = tensor(0.0008, grad_fn=<DivBackward0>)
epoch 1600 loss = tensor(0.0004, grad_fn=<DivBackward0>)
epoch 1700 loss = tensor(0.0002, grad_fn=<DivBackward0>)
epoch 1800 loss = tensor(0.0001, grad

## 2. Transfer Learning


### MNIST

In [54]:
seed = 55
np.random.seed(seed)
torch.manual_seed(seed)

class MNISTNet(torch.nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 20, 5, 1)
        self.pool = torch.nn.MaxPool2d(20, 50, 5, 1)
        self.fc1 = torch.nn.Linear(4*4*50, 500)
        self.fc2 = torch.nn.Linear(500, 10)
    
    def forward(self, x_val):
        x_val = torch.nn.functional.relu(self.conv1(x_val))
        x_val = torch.nn.functional.max_pool2d(x_val, 2, 2)
        x_val = self.pool(x_val)
        x_val = x_val.view(-1,4 * 4 * 50)
        x_val = torch.nn.functional.relu(self.fc1(x_val))
        x_val = self.fc2(x_val)
        return x_val

def train_net(mnist_model, device, train_loader, optimizer, epoch):
    mnist_model.train()
    for i, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = mnist_model(data)
        loss = torch.nn.functional.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if i % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i * len(data), len(train_loader.dataset),
                100. * i / len(train_loader), loss.item()))

def test_net(mnist_model, device, test_loader):
    mnist_model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = mnist_model(data)
            test_loss += torch.nn.functional.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

train_batch_size = 64
test_batch_size = 100
epochs = 10
device = torch.device("cpu")
learning_rate = 1e-3
momentum = 0.5


train_loader = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST('../data', train=True, download=True,
               transform=torchvision.transforms.transforms.Compose([
                   torchvision.transforms.transforms.ToTensor(),
                   torchvision.transforms.transforms.Normalize((0.1307,), (0.3081,))
               ])),
            batch_size=train_batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST('../data', train=False, transform=torchvision.transforms.transforms.Compose([
                torchvision.transforms.transforms.ToTensor(),
                torchvision.transforms.transforms.Normalize((0.1307,), (0.3081,))
            ])),
            batch_size=batch_size, shuffle=False)


mnist_model = MNISTNet().to(device)
optimizer = torch.optim.SGD(mnist_model.parameters(), lr=learning_rate, momentum=momentum)

for epoch in range(1, epochs + 1):
    train_net(mnist_model, device, train_loader, optimizer, epoch)
    test_net(mnist_model, device, test_loader)

RuntimeError: shape '[-1, 800]' is invalid for input of size 1280

### CIFAR

In [None]:
seed = 55
np.random.seed(seed)
torch.manual_seed(seed)
#The compose function allows for multiple transforms
#transforms.ToTensor() converts our PILImage to a tensor of shape (C x H x W) in the range [0,1]
#transforms.Normalize(mean,std) normalizes a tensor to a (mean, std) for (R, G, B)
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_set = torchvision.datasets.CIFAR10(root='./cifardata', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./cifardata', train=False, download=True, transform=transform)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#Training
n_training_samples = 20000
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_training_samples, dtype=np.int64))

#Validation
n_val_samples = 5000
val_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_training_samples, n_training_samples + n_val_samples, dtype=np.int64))

#Test
n_test_samples = 5000
test_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_test_samples, dtype=np.int64))


def get_train_loader(batch):
    return torch.utils.data.DataLoader(train_set, batch_size=batch,sampler=train_sampler, num_workers=2)

test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, sampler=test_sampler, num_workers=2)
val_loader = torch.utils.data.DataLoader(train_set, batch_size=128, sampler=val_sampler, num_workers=2)


#https://blog.algorithmia.com/convolutional-neural-nets-in-pytorch
class MyCNN(torch.nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(3,18, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = torch.nn.Linear(18*16*16, 64)
        self.fc2 = torch.nn.Linear(64, 10)
    
    def forward(self, x_val): 
        x_val = torch.nn.functional.relu(self.conv1(x_val))
        x_val = self.pool(x_val)
        x_val = x_val.view(-1, 18 * 16 * 16)
        x_val = torch.nn.functional.relu(self.fc1(x_val))
        x_val = self.fc2(x_val)
        return x_val
    
def output_size(in_size, kernel_size, stride, padding):
    output = int((in_size - kernel_size + 2*padding) / stride) + 1
    return output

def create_loss_optimiser(neural_net, learning_rate):
    loss = torch.nn.CrossEntropyLoss()
    optimiser = torch.optim.Adam(neural_net.parameters(), lr=learning_rate)
    
    return loss, optimiser

def trainNet(net, batch_size, n_epochs, learning_rate):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    #Get training data
    train_loader = get_train_loader(batch_size)
    n_batches = len(train_loader)
    
    #Create our loss and optimizer functions
    loss, optimizer = create_loss_optimiser(net, learning_rate)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            #Get inputs
            inputs, labels = data
            
            #Wrap them in a Variable object
            inputs, labels = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.item()#.data[0]
            total_train_loss += loss_size.item()#.data[0]

            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        #At the end of the epoch, do a pass on the validation set
        total_val_loss = 0
        for inputs, labels in val_loader:
            
            #Wrap tensors in Variables
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Forward pass
            val_outputs = net(inputs)
            val_loss_size = loss(val_outputs, labels)
            total_val_loss += val_loss_size.item()#.data[0]
            
        print("Validation loss = {:.2f}".format(total_val_loss / len(val_loader)))
        
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))


CNN = MyCNN()
trainNet(CNN, batch_size=32, n_epochs=5, learning_rate=0.001)