<a href="https://colab.research.google.com/github/benbaz-2/comp551/blob/main/assignment3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 3

## Installations

In [38]:
!pip install medmnist



In [39]:
import numpy as np

## Load dataset

In [40]:
from medmnist import OrganAMNIST
train_dataset = OrganAMNIST(split='train', download=True)
test_dataset = OrganAMNIST(split='test', download=True)

Using downloaded and verified file: /root/.medmnist/organamnist.npz
Using downloaded and verified file: /root/.medmnist/organamnist.npz


In [41]:
# Put the data in numpy arrays
x_train, y_train = train_dataset.imgs, train_dataset.labels
x_test, y_test = test_dataset.imgs, test_dataset.labels

In [42]:
# vectorize x
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)


In [43]:
# center and normalize data
mean_train = np.mean(x_train, axis=1, keepdims=True)
std_train = np.std(x_train, axis=1, keepdims=True)
x_train = (x_train - mean_train) / std_train

mean_test = np.mean(x_test, axis=1, keepdims=True)
std_test = np.std(x_test, axis=1, keepdims=True)
x_test = (x_test - mean_test) / std_test

In [44]:
y = np.zeros((y_train.shape[0], 11))
for i in range(y.shape[0]):
  j = y_train[i]
  y[i, j] = 1
y_train = y

y = np.zeros((y_test.shape[0], 11))
for i in range(y.shape[0]):
  j = y_test[i]
  y[i, j] = 1
y_test = y


## Implement a MLP

In [45]:
# Activation functions

ReLU = lambda x: np.maximum(0, x)
Softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
cross_entropy = lambda y_true, y_pred: -np.sum(y_true * np.log(y_pred)) / y_true.shape[0]

In [46]:
# This class represents an arbitrary layer
class NeuralNetLayer:
  def __init__(self):
    self.gradient = None
    self.parameters = None

  def forward(self, x):
    raise NotImplementedError()

  def backward(self, x):
    raise NotImplementedError()

In [62]:
# This class represents an arbitrary neural network
class DeepMLP:
  def __init__(self, layers=[]):
    self.layers = layers

  def forward(self, x):
    for layer in self.layers:
      x = layer.forward(x)
    return x

  def backward(self, x):
    for layer in self.layers[::-1]:
      x = layer.backward(x)

  def fit(self, x, y, loss=cross_entropy, epochs=100, lr=0.01):
    for epoch in range(epochs):
      y_pred = self.forward(x)
      loss_value = loss(y, y_pred)
      dl = y_pred - y
      self.backward(dl)

  def predict(self, x):
    return np.argmax(self.forward(x), axis=1)


In [61]:
# Implement a linear layer

class LinearLayer(NeuralNetLayer):

  def __init__(self, input_size, output_size): # D, M
    super().__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.input = None
    self.output = None

    # Use Xavier initialization
    self.parameters = {
        'weights': np.random.randn(input_size, output_size) * np.sqrt(1 / (input_size)),
        'bias': np.zeros(output_size)
    }

    self.gradient = {
        'weights': np.zeros((input_size, output_size)),
        'bias': np.zeros(output_size)
    }

  def forward(self, x):
    pdb.set_trace()
    self.input = x  # N x D
    return np.dot(x, self.parameters['weights']) + self.parameters['bias']  # N x M

  def backward(self, dz):  # N x M
    self.gradient['weights'] = np.dot(self.input.T , dz)  # D X M
    self.gradient['bias'] = np.sum(dz, axis=0) # M
    return np.dot(dz, self.parameters['weights'].T)   # Return N x D gradient for next layer in backpropagation


In [49]:
class ReLULayer(NeuralNetLayer):
  def __init__(self):
    super().__init__()
    self.input = None
    self.output = None

  def forward(self, x):
    self.input = x  # N x D
    return ReLU(x)

  def backward(self, dz):
    return dz * (self.input > 0) # N x D Element wise multiplication

In [65]:
class SoftmaxLayer(NeuralNetLayer):
  def __init__(self):
    super().__init__()
    self.input = None
    self.output = None

  def forward(self, x):
    self.input = x  # N x D
    self.output = Softmax(x)
    return self.output

  def backward(self, dz):
        N, D = self.input.shape
        ds = np.zeros((N, D))
        for i in range(N):
            for j in range(D):
              for k in range(D):
                if k == j:
                  ds[i, j] = self.output[i, j] * (1 - self.output[i, k])
                else:
                  ds[i, j] = -self.output[i, j] * self.output[i, k]
        return np.dot(ds, dz)

## Model Training

In [63]:
model = DeepMLP()
model.layers.append(LinearLayer(784, 128))
model.layers.append(ReLULayer())
model.layers.append(LinearLayer(128, 11))
model.layers.append(SoftmaxLayer())

In [64]:
import pdb
model.fit(x_train, y_train)

> [0;32m<ipython-input-61-2dbf9c737096>[0m(25)[0;36mforward[0;34m()[0m
[0;32m     23 [0;31m  [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mx[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     24 [0;31m    [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 25 [0;31m    [0mself[0m[0;34m.[0m[0minput[0m [0;34m=[0m [0mx[0m  [0;31m# N x D[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     26 [0;31m    [0;32mreturn[0m [0mnp[0m[0;34m.[0m[0mdot[0m[0;34m([0m[0mx[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mparameters[0m[0;34m[[0m[0;34m'weights'[0m[0;34m][0m[0;34m)[0m [0;34m+[0m [0mself[0m[0;34m.[0m[0mparameters[0m[0;34m[[0m[0;34m'bias'[0m[0;34m][0m  [0;31m# N x M[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     27 [0;31m[0;34m[0m[0m
[0m
ipdb> x.shape
(34561, 784)
ipdb> self.parameters['weights'].shape
(784, 128)
ipdb> self.parameters['bias'].shap

  cross_entropy = lambda y_true, y_pred: -np.sum(y_true * np.log(y_pred)) / y_true.shape[0]


> [0;32m<ipython-input-61-2dbf9c737096>[0m(25)[0;36mforward[0;34m()[0m
[0;32m     23 [0;31m  [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mx[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     24 [0;31m    [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 25 [0;31m    [0mself[0m[0;34m.[0m[0minput[0m [0;34m=[0m [0mx[0m  [0;31m# N x D[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     26 [0;31m    [0;32mreturn[0m [0mnp[0m[0;34m.[0m[0mdot[0m[0;34m([0m[0mx[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mparameters[0m[0;34m[[0m[0;34m'weights'[0m[0;34m][0m[0;34m)[0m [0;34m+[0m [0mself[0m[0;34m.[0m[0mparameters[0m[0;34m[[0m[0;34m'bias'[0m[0;34m][0m  [0;31m# N x M[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     27 [0;31m[0;34m[0m[0m
[0m
ipdb> q


In [55]:
x_train.shape

(34561, 784)