<a href="https://colab.research.google.com/github/atulshah16/DeepLearningCMP258/blob/master/Graded%20Assignment%202/Shah_Atul_014530243_Auto_Diff_Gradient_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Autodiff/Autogradient implementation on MNIST classifier


In [0]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import keras

#Defining Layers, forward and backward pass

In [0]:
class Layer:
    def Forward(self):
        raise NotImplemented
    def Backward(self, grad):
        raise NotImplemented
    def __call__(self, *args):
        return self.Forward(*args)

class Sigmoid:
    def Forward(self,x):
        self.x = x   
        return 1/(1+np.exp(-x))  
    def Backward(self, grad):
        grad_input = self.x*(1-self.x) * grad
        return grad_input

class Relu(Layer):
    def Forward(self,x):
        self.x = x
        return np.maximum(np.zeros_like(x), x)      
    def Backward(self, grad):
        grad_input = (self.x > 0) * grad
        return grad_input

class SoftmaxCrossentropyWithLogits(Layer):
    def Forward(self, x, y):
        self.x = x
        self.y = y
        exps = np.exp(x) 
        self.softmax = exps / np.sum(exps, axis=-1, keepdims=True)
        logits = self.softmax[np.arange(x.shape[0]),y]
        log_likelihood = -np.log(logits)
        loss = np.sum(log_likelihood) / x.shape[0]
        return loss
      
    def Backward(self, grad=True):
        batch = self.x.shape[0]
        grad = self.softmax
        grad[np.arange(batch),self.y] -= 1
        grad = grad/batch
        return grad

class MSE(Layer):
    def Forward(self, x, y):
        self.x = x
        self.y = y
        return ((x - y)**2) / (self.x.shape[0]*2)

    def Backward(self, grad=None):
        return (self.x - self.y) / self.x.shape[0]

class Linear(Layer):
    def __init__(self, input, output, lr=0.0001):
        self.A = 2*np.random.random((input, output)) - 1
        self.b = 2*np.random.random((output)) - 1
        self.lr = lr
    
    def Forward(self, x):
        self.x = x
        return np.dot(x,self.A) + self.b

    def Backward(self, grad):
        b_grad = grad.mean(axis=0)*self.x.shape[0]
        A_grad = np.dot(self.x.T, grad)
        grad_input = np.dot(grad, self.A.T)
        self.A -= A_grad * self.lr
        self.b -= b_grad * self.lr
        return grad_input


## Invoking Auto Diff Model defined above on MNIST

In [0]:
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

class Model(Layer):

    def __init__(self, lr=0.00001):
        self.lr = lr
        self.layers = [
            Linear(784,100, lr=self.lr),
            Relu(),
            Linear(100,200, lr=self.lr),
            Relu(),
            Linear(200,10, lr=self.lr)        
        ]

    def Forward(self,x):
        for l in self.layers:
            x = l(x)
        return x

    def Backward(self, grad):
        for l in self.layers[::-1]:
            grad = l.Backward(grad)

        return grad


simple = transforms.Compose([transforms.ToTensor()])
ds = MNIST('./mnist', download=True, transform=simple)
ld = DataLoader(ds, batch_size=2, pin_memory=True, drop_last=True) 

mm = Model()
loss = SoftmaxCrossentropyWithLogits()
_loss_avg = 0 
for e in range(7):
    for i, (img, label) in enumerate(ld):
        x = img.view(2,-1).numpy()

        res = mm(x)
        _loss = loss(res, label.numpy())
        _loss_avg += _loss.mean() # running loss mean
        grad = loss.Backward(1)
        mm.Backward(grad)

        if i % 100 == 0:
            print(_loss_avg/100)
            _loss_avg = 0
            print('---------')
            
