In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn import ReLU
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from utils import *

#Exercise 3 
## Loss Functions

### Implement Mean Squared Error Loss, and Cross Entropy loss in the cells below.

Recall the formula for calculating mean squared error from lecture:

$$MSE = \frac{\sum\limits_{i = 1}^{n} (y_i - \hat{y_i})^2}{n}$$


In [None]:
class MSE(nn.Module):
  def __call__(self, y_true, y_pred):
    out = None
    # ================================= #
    #          Your code here           #
    # ================================= #
    pass
    # ================================= #

    return out

In [None]:
torch.manual_seed(0)

# Initialize MSE loss
layer = MSE()

# Create some random data
y_true, y_pred = torch.randn(10), torch.randn(10)
print("True data", y_true)
print("Predicted data", y_pred)

# Perform a forward pass
out = layer(y_true, y_pred)

print("Result", out)

In [None]:
check_loss(MSE)

#Exercise 4

Hint: The gradient is stored in self.matrix.grad and self.bias.grad

In [None]:
class MatrixLayer(nn.Module):
  def __str__(self):
    return f"MatrixLayer({self.input_size}, {self.output_size})"
    
  def __init__(self, input_size, output_size):
    super(MatrixLayer, self).__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.matrix = nn.Parameter(torch.randn(input_size, output_size))
    self.bias = nn.Parameter(torch.randn(output_size))

  def forward(self, x):
    out = None
    out = torch.matmul(x, self.matrix) + self.bias
    return out

  def update(self, learning_rate):
    with torch.no_grad():
      # ================================= #
      #          Your code here           #
      # ================================= #
      pass
      pass
      # ================================= #
      self.matrix.grad = None
      self.bias.grad = None

In [None]:
check_grad(MatrixLayer)

#Exercise 5

## We are going to download the MNIST dataset.

In [None]:
num_epochs = 10
batch_size = 8
learning_rate = 1e-3

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(mnist_trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(mnist_testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)
classes = [str(i) for i in range(10)]

## Visualize a sample image from dataset

In [None]:
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))

## Create a model that has a matrix layer with 784 inputs and 128 outputs, relu, followed by another matrix layer with 128 inputs and 10 outputs.

Hint: Put your layers in the variable `layers`

In [None]:
torch.manual_seed(0)
# ================================= #
#          Your code here           #
# ================================= #
layers = []
# ================================= #
model = Model(layers)
criterion = nn.CrossEntropyLoss()
print(model)

## Run training loop
Hint: 
1) pass data into model

2) compute loss on model output and labels

3) backprop through loss 

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # Extract data from the loader and reshape it into the correct shape
        inputs, labels = data
        inputs = inputs.reshape(-1, 784)

        # Do a forward pass through the model and get the outputs
        outputs = model(inputs)

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Perform gradient descent
        loss.backward()
        model.update()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f"[Epoch: {epoch + 1}, Iteration: {i + 1}] loss: {running_loss/2000:0.3f}")
            running_loss = 0.0

print('Finished Training')

## Visualize a sample of data

In [None]:
imshow(torchvision.utils.make_grid(inputs.reshape(-1, 1, 28, 28)))
_, predicted = torch.max(outputs.data, 1)
print(' '.join('%5s' % classes[predicted[j]] for j in range(batch_size)))

## Visualize model performance

In [None]:
evaluate(model, testloader)