## Install Packages

In [1]:
!pip install keras scipy numpy wandb 

Collecting keras
  Downloading keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
     ---------------------------------------- 1.7/1.7 MB 10.7 MB/s eta 0:00:00
Collecting scipy
  Using cached scipy-1.10.0-cp310-cp310-win_amd64.whl (42.5 MB)
Collecting numpy
  Using cached numpy-1.24.2-cp310-cp310-win_amd64.whl (14.8 MB)
Collecting jax
  Downloading jax-0.4.3.tar.gz (1.2 MB)
     ---------------------------------------- 1.2/1.2 MB 15.2 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting wandb
  Downloading wandb-0.13.10-py3-none-any.whl (2.0 MB)
     ---------------------------------------- 2.0/2.0 MB 15.6 MB/s eta 0:00:00
Collecting opt_einsum
  Downloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
     ---------------------------------------- 65.5/65.5 kB ? eta 0:00:00
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting appdirs>=1.4.3
  Downloading appdirs-1.

## Load Fashion MNIST

In [1]:
from keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [93]:
x_train.shape

(60000, 28, 28)

In [94]:
x_train.reshape(x_train.shape[0], -1).shape


(60000, 784)

In [2]:
x_train.shape, y_train.shape

((60000, 28, 28), (60000,))

In [3]:
x_test.shape, y_test.shape

((10000, 28, 28), (10000,))

## Utilities

In [3]:
import numpy as np

In [2]:
from abc import ABC, abstractmethod

class Module(ABC):
    
    @abstractmethod
    def forward(self, *args, **kwargs):
        pass
    
    @property
    def parameters(self):
        return self.__dict__

class WsAndBs:
    def __init__(self, out_size, in_size=1, type="Zero") -> None:
        self.value = self.initialize_weights((out_size, in_size), type)
        self.grad = np.zeros_like(self.value)
    
    def initialize_weights(self, shape: tuple, type: str="Zero") -> np.ndarray:
        '''
            Initialize weights of shape: (shape) with type: type strategy
        '''
        if type == "Zero":
            return np.zeros(shape)
        elif type == "Xavier":
            raise NotImplementedError()
        elif type == "Random":
            return np.random.rand(shape)
        else:
            raise KeyError("Incorrect option for weight initialization strategy")
        
    def zero_grad(self):
        self.grad = np.zeros_like(self.grad)

## Linear/Dense/Fully-Connected Layer

In [21]:
class Linear:
    def __init__(self, in_size: int, out_size: int, type="Zero"):
        '''
            in_size: int -> Number of input units
            out_size: int -> Number of output units
            type: str -> "Zero" initialization or "Xavier" Initialization or "Random" Initialization
        '''
        self.Weights = WsAndBs(in_size, out_size, type)
        self.bias = WsAndBs(out_size, 1, type)
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        '''
            x -> pass the numpy ndarray into the linear layer (out_size, x.shape[1])
        '''
        return self.Weights.value.T @ x + self.bias.value

In [16]:
ll = Linear(2, 4, type="Zero")
vars(ll)

{'Weights': <__main__.WsAndBs at 0x25b4b5a98a0>,
 'bias': <__main__.WsAndBs at 0x25b4b5a9b10>}

## Activation Functions

In [17]:
class ReLU:
    def __init__(self) -> None:
        pass

    def __call__(self, x) -> np.ndarray:
        baseline = np.zeros_like(x)
        return np.maximum(x, baseline)
    
    def diff(self, x) -> np.ndarray:
        return (x > 0).astype(int)

class Sigmoid:
    def __init__(self, scaler: int=1) -> None:
        self.scaler = scaler
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        sig = 1 / (1 + np.exp(-x))
        return self.scaler * sig
    
    def diff(self, x) -> np.ndarray:
        return self(x) * (1 - self(x))

class Tanh:
    def __init__(self) -> None:
        pass
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        return np.tanh(x)
    
    def diff(self, x) -> np.ndarray:
        return 1 - self(x) ** 2

class Softmax:
    def __init__(self) -> None:
        pass
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        return np.exp(x) / np.sum(np.exp(x), axis=0)
    
    def diff(self, x):
        z = self(x)
        return - np.outer(z, z) + np.diag(z.flatten())

In [53]:
class Model(Module):
    def __init__(self) -> None:
        self.layers = [
            Linear(28*28, 1024), 
            Linear(1024, 512), 
            Linear(512, 256), 
            Linear(256, 10)
            ]
        self.relu = ReLU()
        self.softmax = Softmax()
        self.call_stack = []
    
    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        for layer in self.layers[:-1]:
            x = self.relu(layer(x))
            self.call_stack.append(self.relu)
        x = self.softmax(self.layers[-1](x))

        return x

In [54]:
x = np.random.rand(6, 28*28)
print(x)
model = Model()
model.forward(x.T)

[[0.7569003  0.49460604 0.36379068 ... 0.2674479  0.35733391 0.39038585]
 [0.26653186 0.07122563 0.48168974 ... 0.75514762 0.28510256 0.14774531]
 [0.22099282 0.35252487 0.22843665 ... 0.60700402 0.57776822 0.73806321]
 [0.23161329 0.20282435 0.88226183 ... 0.04563657 0.62501459 0.7204265 ]
 [0.90522623 0.45532616 0.34859206 ... 0.94351581 0.98118539 0.02678171]
 [0.46206164 0.05355742 0.69603832 ... 0.70005124 0.41965368 0.1458147 ]]


array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]])

In [55]:
model.call_stack

[]

In [1]:
class MSELoss:
    def __init__(self):
        pass

    def __call__(self, y_pred, y_hat):
        return np.mean((y_pred - y_hat) ** 2)
    
    def diff(self, y_pred, y_hat):
        return  - np.mean((y_pred - y_hat))

In [4]:
arr = np.array([1, 2, 3, 2, 1, 2])
arr1 = np.array([1, 2, 2, 1, 2, 3])
loss = MSELoss()
loss(arr, arr1)

TypeError: MSELoss.__init__() takes 0 positional arguments but 1 was given

In [110]:
arr.sum()/arr.size

0.5075078313266619

In [114]:
arr.mean(axis=0)

array([0.58023105, 0.54061111, 0.40168134])