In [83]:
import numpy as np

from tqdm.notebook import tqdm

import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms as T

In [53]:
transform = T.Compose([
                        T.ToTensor(),
                        T.Normalize([0.4914, 0.44822, 0.4465], [0.247, 0.243, 0.261])
                      ])

data_train = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
data_train, data_valid = torch.utils.data.random_split(data_train, (45000, 5000))
data_test = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [56]:
batch_size = 256

loader_train = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=False, num_workers=2)
loader_valid = torch.utils.data.DataLoader(data_valid, batch_size=batch_size, shuffle=False, num_workers=2)
loader_test = torch.utils.data.DataLoader(data_test, batch_size=batch_size, shuffle=False, num_workers=2)

In [55]:
def softmax(x):
    x = x - np.max(x, axis=0, keepdims=True)
    ex = np.exp(x)
    return ex / np.sum(ex, axis=0, keepdims=True)

def negative_log_likelihood(yhat, y, eps=1e-9):
    return -np.sum(y * np.log(yhat + eps)) / y.shape[1]

class ReLU:
    x = None
    
    def forward(self, x):
        self.x = x
        y = np.maximum(x, 0)
        return y

    def backward(self, dldy):
        # Note that the jacobian is a diagonal matrix, since ReLU is applied elementwise
        # we can thus simply express the jacobian as the diagonal vector

        dydx = self.x > 0  # (one where x > 0, else 0)
        dldx = dldy * dydx  # chain rule
        return dldx


class MultinomialCrossEntropyLoss:
    y, yhat = None, None

    def forward(self, logits, y):
        self.y = y
        self.yhat = softmax(logits)
        return negative_log_likelihood(self.yhat, y)

    def backward(self):
        dldlogits = (self.yhat - self.y) / self.y.shape[1]
        return dldlogits

In [78]:
class MLP():
    def __init__(self, hidden_neurons = 100, input_shape = 32*32*3, output_shape = 10):

        # Weights and biases
        b = np.sqrt(1 / input_shape)
        self.W1 = np.random.uniform(-b, b, size=(hidden_neurons, input_shape)).astype(np.float32)
        self.b1 = np.random.uniform(-b, b, size=(hidden_neurons, 1)).astype(np.float32)

        b = np.sqrt(1 / hidden_neurons)
        self.W2 = np.random.uniform(-b, b, size=(output_shape, hidden_neurons)).astype(np.float32)
        self.b2 = np.random.uniform(-b, b, size=(output_shape, 1)).astype(np.float32)
        
        # Placeholders for their gradients
        self.dldW1, self.dldb1, self.dldW2, self.dldb2 = None, None, None, None

        # Activations and hiddens/outputs to be computed in forward() and to be used in backward()
        self.x, self.h = None, None

        # Activation function for hidden layer
        self.relu = ReLU()

    def forward(self, x):
        
        self.x = self.W1 @ x.T + self.b1
        self.h = self.relu.forward(self.x)
        logits = self.W2 @ self.h + self.b2
        return logits
    
    def backward(self, x):
        raise NotImplementedError

In [87]:
# initialization
mlp = MLP(100)
loss_fun = MultinomialCrossEntropyLoss()

train_losses = []
train_accuracies = []
valid_losses = []
valid_accuracies = []

# mini batch gradient descent
lr = 5e-2
batch_size = 50
epoches = 50

for epoch in tqdm(range(epoches)):

  epoch_losses = []
  correct = 0

  for x, y in loader_train:
    
    # Do the numpy stuff here, dirty and quick
    x, y = x.numpy().reshape((-1, 32*32*3)), y.numpy()

    logits = mlp.forward(x)
    print(y.shape, logits.shape)
    loss_fun.forward(logits, y)

#     ##### YOUR CODE GOES HERE #####
    
#     # forward pass: calculate logits and loss
    
#     ##### YOUR CODE GOES HERE #####

#     # backward pass: back propogate the loss gradient to your weights and update them with the learning rate

#     ##### YOUR CODE GOES HERE #####
    
#     ###############################

#     # stats
#     epoch_losses.append(loss)
#     correct += np.sum(np.argmax(logits, 0) == np.argmax(y, 0))

#   train_losses.append(np.mean(epoch_losses))
#   train_accuracies.append(correct / x_train.shape[1])
  
#   # we're using the test set as a validation set here
#   logits = mlp.forward(x_test)
#   valid_losses.append(loss_fun.forward(logits, y_test))
#   valid_accuracies.append(np.mean(np.argmax(logits, 0) == np.argmax(y_test, 0)))
  
#   # update progress bar
#   pbar.set_description(f'acc: {train_accuracies[-1]:.2f}/{valid_accuracies[-1]:.2f}')

  0%|          | 0/50 [00:00<?, ?it/s]

(256,) (10, 256)


IndexError: tuple index out of range

In [None]:
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='train')
plt.plot(valid_losses, label='valid')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend()
plt.grid()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='train')
plt.plot(valid_accuracies, label='valid')
plt.ylim(0, 1)
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()