# 401 Assignment 2

In [14]:
# imports
import random
import numpy as np
import torch
import torchvision
import torchvision.transforms
import torch.optim
import torch.utils.data.sampler
import torch.nn.functional


# settings

random.seed(55)

## 1. Gradient-based Learning with Tensors

### Defining R<sup>5</sup> to R<sup>4</sup> Function
The definition of the function with noise added is done in create_output_tensor_noisy. This takes a tensor input of
with many different 5 value tuples and returns the output for all the tuples passed in.
The R<sup>5</sup> to R<sup>4</sup> is defined by multiplying the input with a predefined weight matrix to apply
a linear change.
 

In [15]:
def create_output_tensor_noisy(input, weights):
    tensor_a = input#torch.from_numpy(input)
    tensor_b = torch.from_numpy(weights)
    output = tensor_a.t()@tensor_b.t()
    noise = np.random.normal(loc=0, scale=0.01, size=np.shape(input.size))
    output = output + torch.from_numpy(noise)
    return output

### Training
We then train the network using tenor operations, our prediction / model is defined as transpose of input (x) matrix multiplied by
the transpose of our current weight matrix then the addition of our biases.
We calculate our loss using Mean Square Error (mse).


In [16]:
def mse(in1, in2):
    diff = in1 - in2
    return torch.sum(diff*diff) / diff.numel()

def model(x,w,b):
    return x.t() @ w.t() +b

def train(input, target, w, b, i, learning_rate=1e-2):
    prediction = model(input, w,b)
    loss = mse(prediction, target)
    if i % 100 == 0:
        print("epoch " + str(i) + " loss = " + str(loss.item()))
    loss.backward(retain_graph=True)
    with torch.no_grad():
        w -= w.grad * learning_rate
        b -= b.grad * learning_rate
        w.grad.zero_()
        b.grad.zero_()

def test(input, target, w, b):
    prediction = model(input, w, b)
    print('Expected:')
    print(target.data.numpy())
    print('Prediction:')
    print(prediction.data.numpy())
    return prediction



eqtn = np.array([[3.7, 2.3, 1, 0, 5],
      [4.7, 8.1, 2.5,  0, 25],
      [2.7, 1, 9, 3.9, 0],
      [5.2, 6, 2, 0, 0]], dtype=np.float32)
tensor_in = torch.randn(5,100, requires_grad=True)
tensor_target = create_output_tensor_noisy(tensor_in, eqtn)     
weights = torch.randn(4,5, requires_grad=True)
biases = torch.randn(4, requires_grad=True)
q1_epochs = 2000
for i in range(1,q1_epochs+1):
    train(input= tensor_in, target= tensor_target, w = weights, b = biases, i = i)


tensor_test_in = torch.randn(5,5, requires_grad=True)
tensor_test_target = create_output_tensor_noisy(tensor_test_in, eqtn)  
q1_test_pred = test(tensor_test_in, tensor_test_target, weights, biases)
print('**********')
print('Final MSE:')
print(mse(q1_test_pred, tensor_test_target).item())

print('**********')
print('Known Linear Weights:')
print(eqtn)
print('****')
print('Found Linear Weights:')
print(weights)
print('**********')


epoch 100 loss = 87.98234558105469
epoch 200 loss = 36.75639724731445
epoch 300 loss = 16.071008682250977
epoch 400 loss = 7.328232288360596
epoch 500 loss = 3.4663479328155518
epoch 600 loss = 1.6904804706573486
epoch 700 loss = 0.8449733257293701
epoch 800 loss = 0.43064481019973755
epoch 900 loss = 0.2228463590145111
epoch 1000 loss = 0.1166977658867836
epoch 1100 loss = 0.06168539449572563
epoch 1200 loss = 0.0328509584069252
epoch 1300 loss = 0.01759958826005459
epoch 1400 loss = 0.009475289843976498
epoch 1500 loss = 0.00512198219075799
epoch 1600 loss = 0.0027779724914580584
epoch 1700 loss = 0.0015110061503946781
epoch 1800 loss = 0.000823843467514962
epoch 1900 loss = 0.00045010432950221
epoch 2000 loss = 0.00024632096756249666
Expected:
[[ 5.861115   10.268809   -1.5568438  10.771001  ]
 [ 1.0745783   8.165716   -8.592041   -5.300141  ]
 [ 3.180463   25.95541    -9.765273   -5.9001365 ]
 [ 9.077727   20.789263   10.798818    8.221809  ]
 [ 3.4222834  -0.77491516 24.398956   1

## 2. Transfer Learning


### MNIST

Transfer learning starts with the creation of a CNN for MNIST dataset, where we are able to train it to 99% correctness
in one epoch of the 60,000 point dataset. This is a large increase from the 6% accuracy it has before training.

Our CNN is defined in the class MNISTCNN which extends pytorchs Module class, provinding the underlying logic for many
CNN opertaions, allowing us to better define parameters such as inputs, layers and the foward function.

In [17]:
seed = 55
np.random.seed(seed)
torch.manual_seed(seed)

class MNISTCNN(torch.nn.Module):
    def __init__(self):
        super(MNISTCNN,self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1,32, kernel_size=5, stride=1, padding=2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32,64, kernel_size=5, stride=1, padding=2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.drop_out = torch.nn.Dropout()
        self.fc1 = torch.nn.Linear(7 * 7 * 64, 1000)
        self.fc2 = torch.nn.Linear(1000, 10)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.reshape(x.size(0), -1)
        x = self.drop_out(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

 Next we train and test the CNN

In [18]:
def train_net(mnist_model, device, train_loader, optimizer, loss_func, epoch):
    mnist_model.train()
    for i, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = mnist_model(data)
        loss = loss_func(output, target)
        loss.backward()
        optimizer.step()
        if i % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i * len(data), len(train_loader.dataset),
                100. * i / len(train_loader), loss.item()))
    return loss.item()

def test_net(mnist_model, device, test_loader, loss_func):
    mnist_model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = mnist_model(data)
            _, pred = torch.max(output.data,1)
            test_loss += loss_func(output, target).item() # sum up batch loss
            correct += pred.eq(target).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return correct / len(test_loader.dataset)

batch_size = 64
train_batch_size = 64
test_batch_size = 100
mnist_epochs = 1
device = torch.device("cpu")
learning_rate = 1e-3

print("Training model with:")
print("batch size = " + str(train_batch_size))
print("learning rate = " + str(learning_rate))
print("over " + str(mnist_epochs) + " epochs")



train_loader = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST('./data', train=True, download=True,
               transform=torchvision.transforms.transforms.Compose([
                   torchvision.transforms.transforms.ToTensor(),
                   torchvision.transforms.transforms.Normalize((0.1307,), (0.3081,))
               ])),
            batch_size=train_batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST('./data', train=False, transform=torchvision.transforms.transforms.Compose([
                torchvision.transforms.transforms.ToTensor(),
                torchvision.transforms.transforms.Normalize((0.1307,), (0.3081,))
            ])),
            batch_size=batch_size, shuffle=False)


mnist_model = MNISTCNN().to(device)
optimizer = torch.optim.Adam(mnist_model.parameters(), lr=learning_rate)
loss_func = torch.nn.CrossEntropyLoss()
print("Before Training")
test_net(mnist_model,device,test_loader,loss_func)
print("Training model with:")
print("batch size = " + str(train_batch_size))
print("learning rate = " + str(learning_rate))
print("over " + str(mnist_epochs) + " epochs")
for epoch in range(1, mnist_epochs + 1):
    train_net(mnist_model, device, train_loader, optimizer, loss_func, epoch)
    test_net(mnist_model, device, test_loader, loss_func)

Training model with:
batch size = 64
learning rate = 0.001
over 1 epochs
Before Training

Test set: Average loss: 0.0364, Accuracy: 623/10000 (6%)

Training model with:
batch size = 64
learning rate = 0.001
over 1 epochs

Test set: Average loss: 0.0006, Accuracy: 9878/10000 (99%)



### CIFAR10
To complete transfer learning we have to create a compatible CNN for our second data set, CIFAR10. This requires some
form of transformation of the data as the data sets include images of different sizes and channels.
This transformation can be seen in the transform variable below, where we use the Compose function to add several 
transforms including resizing the image and making it greyscale  so it has the same input parameters as the MNIST
dataset.

In [19]:
seed = 55
np.random.seed(seed)
torch.manual_seed(seed)
device = torch.device("cpu")
cifar_epoch = 10
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=28),
    torchvision.transforms.Grayscale(num_output_channels=1),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307,), (0.3081,))])#((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#Training
n_training_samples = 40000
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_training_samples, dtype=np.int64))

#Validation
n_val_samples = 5000
val_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_training_samples, n_training_samples + n_val_samples, dtype=np.int64))

#Test
n_test_samples = 5000
test_sampler = torch.utils.data.sampler.SubsetRandomSampler(np.arange(n_test_samples, dtype=np.int64))


def get_train_loader(batch):
    return torch.utils.data.DataLoader(train_set, batch_size=batch,sampler=train_sampler, num_workers=2)

cifar_test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, sampler=test_sampler, num_workers=2)
cifar_val_loader = torch.utils.data.DataLoader(train_set, batch_size=128, sampler=val_sampler, num_workers=2)
cifar_train_loader = get_train_loader(batch_size)


Files already downloaded and verified
Files already downloaded and verified


Once we have loaded the data sets in the correct form we can move onto training the model using our existing CNN
class. We do this with both a new model and one that has had its first layer transferred from our earlier MNIST model
so that we can compare results.

In [20]:
cifar_model = MNISTCNN().to(device)
cifar_optimizer = torch.optim.Adam(cifar_model.parameters(), lr=learning_rate)
cifar_loss_func = torch.nn.CrossEntropyLoss()


cifar_model_transfer = MNISTCNN().to(device)
cifar_model_transfer.layer1 = mnist_model.layer1
cifar_transfer_optimizer = torch.optim.Adam(cifar_model_transfer.parameters(), lr=learning_rate)
cifar_transfer_loss_func = torch.nn.CrossEntropyLoss()

cifar_acc = []
for epoch in range(1, cifar_epoch + 1):
    #test_net(cifar_model,device,test_loader,loss_func)
    train_net(cifar_model, device, cifar_train_loader, cifar_transfer_optimizer, loss_func, epoch)
    cifar_acc.append(test_net(cifar_model, device, cifar_test_loader, loss_func))

cifar_transfer_acc = []
for epoch in range(1, cifar_epoch + 1):
    train_net(cifar_model_transfer, device, cifar_train_loader, cifar_transfer_optimizer, loss_func, epoch)
    cifar_transfer_acc.append(test_net(cifar_model_transfer, device, cifar_test_loader, loss_func))
    


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1589, Accuracy: 2817/10000 (28%)


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1482, Accuracy: 3000/10000 (30%)


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1412, Accuracy: 3063/10000 (31%)


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1421, Accuracy: 3043/10000 (30%)


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1309, Accuracy: 3233/10000 (32%)


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1264, Accuracy: 3272/10000 (33%)


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1323, Accuracy: 3182/10000 (32%)


Test set: Average loss: 0.2880, Accuracy: 503/10000 (5%)


Test set: Average loss: 0.1222, Accuracy: 3341/10000 (33%)


Test set: Average loss: 0.2880, Accurac

The graph below shows the difference in accuracy over the two CNNs

In [None]:
# GRAPH HERE    
