<a href="https://colab.research.google.com/github/Vigneshthanga/258-Deep-Learning/blob/master/Assignment-2/AutoGradMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing for module auto reload

In [0]:
%matplotlib inline

#To reload all modules before executing a new line
%reload_ext autoreload
%autoreload 2

## Importing libraries

In [0]:
%tensorflow_version 2.x
import tensorflow
print(tensorflow.__version__)
from keras.datasets import mnist
from matplotlib import pyplot as plt
import numpy as np

2.2.0-rc4


## Loading MNIST data

In [0]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [0]:
x_train.shape

(60000, 28, 28)

## Creating class CTensor

In [0]:
class CTensor(object):
    def __init__(self,data, autograd=False, creators=None, creation_op=None, id=None): 
        if not (str(type(data)) != '<class numpy.ndarray>'):
            self.data = np.array(data)
        else:
          self.data = data   
        self.autograd = autograd
        self.grad = None
        if(id is None):
            self.id = np.random.randint(0,10000)
        else:
            self.id = id
        
        self.creators = creators
        self.creation_op = creation_op
        self.children = dict()
        
        if(creators):
            for c in creators:
                if(self.id not in c.children):
                    c.children[self.id] = 1
                else:
                    c.children[self.id] += 1

    ## This function takes care of accounting all the children nodes before propagating to the next
    def all_children_accounted_for(self):
        for id,cnt in self.children.items():
            if(cnt != 0):
                return False
        return True 

    ## Relu function to calculate for a scalar input
    def singleReluDerive(self, x):
      if (x>0):
        return 1
      return 0

    # Defining Backward propagation function
    def backward(self,grad=None, grad_origin=None):
        if(self.autograd):
            if(grad is None):
                grad = CTensor(np.ones_like(self.data))

            if(grad_origin is not None):
                if(self.children[grad_origin.id] == 0):
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1

            if(self.grad is None):
                self.grad = grad
            else:
                self.grad += grad

            assert(grad.autograd == False)
            
            if(self.creators is not None and 
               (self.all_children_accounted_for() or 
                grad_origin is None)):

                if(self.creation_op == "add"):
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self)
                    
                if(self.creation_op == "sub"):
                    self.creators[0].backward(CTensor(self.grad.data), self)
                    self.creators[1].backward(CTensor(self.grad.__neg__().data), self)

                if(self.creation_op == "mul"):
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new , self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)                    
                    
                if(self.creation_op == "mm"):
                    c0 = self.creators[0]
                    c1 = self.creators[1]
                    new = self.grad.mm(c1.transpose())
                    c0.backward(new)
                    new = self.grad.transpose().mm(c0).transpose()
                    c1.backward(new)
                
                if(self.creation_op == "index_select"):
                    new_grad = np.zeros_like(self.creators[0].data)
                    indices_ = self.index_select_indices.data.flatten()
                    grad_ = grad.data.reshape(len(indices_), -1)
                    for i in range(len(indices_)):
                        new_grad[indices_[i]] += grad_[i]
                    self.creators[0].backward(CTensor(new_grad))
                    
                if(self.creation_op == "transpose"):
                    self.creators[0].backward(self.grad.transpose())

                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.expand(dim, self.creators[0].data.shape[dim]))

                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if(self.creation_op == "neg"):
                    self.creators[0].backward(self.grad.__neg__())

                if(self.creation_op == "sigmoid"):
                    ones = CTensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))

                if(self.creation_op == "relu"):
                    vfunc = np.vectorize(singleReluDeriv)
                    self.creators[0].backward(vfunc(self.grad))

                if(self.creation_op == "cross_entropy"):
                    dx = self.softmax_output - self.target_dist
                    self.creators[0].backward(CTensor(dx))

    #Defining Magic methods to support operations like +, ~, -, * and sum for vectors                    
    def __add__(self, other):
        if(self.autograd and other.autograd):
            return CTensor(self.data + other.data, autograd=True, creators=[self,other], creation_op="add")
        return CTensor(self.data + other.data)

    def __neg__(self):
        if(self.autograd):
            return CTensor(self.data * -1, autograd=True, creators=[self], creation_op="neg")
        return CTensor(self.data * -1)
    
    def __sub__(self, other):
        if(self.autograd and other.autograd):
            return CTensor(self.data - other.data, autograd=True, creators=[self,other], creation_op="sub")
        return CTensor(self.data - other.data)
    
    def __mul__(self, other):
        if(self.autograd and other.autograd):
            return CTensor(self.data * other.data, autograd=True, creators=[self,other], creation_op="mul")
        return CTensor(self.data * other.data)    

    def sum(self, dim):
        if(self.autograd):
            return CTensor(self.data.sum(dim), autograd=True, creators=[self], creation_op="sum_"+str(dim))
        return CTensor(self.data.sum(dim))

    #Defining functions below of higher order operations which will be used in forward propagation  
    def expand(self, dim, copies):
        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(dim,len(self.data.shape))
        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
        
        if(self.autograd):
            return CTensor(new_data, autograd=True, creators=[self], creation_op="expand_"+str(dim))
        return CTensor(new_data)
    
    def transpose(self):
        if(self.autograd):
            return CTensor(self.data.transpose(), autograd=True, creators=[self], creation_op="transpose")
        return CTensor(self.data.transpose())
    
    #for matrix multiplication
    def mm(self, other):
        if(self.autograd):
            return CTensor(self.data.dot(other.data), autograd=True, creators=[self,other], creation_op="mm")
        return CTensor(self.data.dot(other.data))

  #Sigmoid activation
    def sigmoid(self):
        if(self.autograd):
            return CTensor(1 / (1 + np.exp(-self.data)), autograd=True, creators=[self], creation_op="sigmoid")
        return CTensor(1 / (1 + np.exp(-self.data)))

  #Relu and softmax activation
    def relu(self):
      def singleRelu(self, x):
        if (x>0):
          return x
        return 0

        if(self.autograd):
            vfunc = np.vectorize(singleRelu)
            return CTensor(vfunc(self, self.data), autograd=True, creators=[self], creation_op="relu")
        return CTensor(vfunc(self, self.data))

    def softmax(self):
       return CTensor((np.exp((self.data))) / np.sum(np.exp((self.data))))


  #There are several loss functions. Here we are using cross_entropy
    def cross_entropy(self, target_indices):

        temp = np.exp(self.data)
        softmax_output = temp / np.sum(temp,
                                       axis=len(self.data.shape)-1,
                                       keepdims=True)
        
        t = target_indices.data.flatten()
        p = softmax_output.reshape(len(t),-1)
        target_dist = np.eye(p.shape[1])[t]
        loss = -(np.log(p) * (target_dist)).sum(1).mean()

        target_dist = target_dist.reshape(target_dist.shape[1], target_dist.shape[0])
        if(self.autograd):
            out = CTensor(loss,
                         autograd=True,
                         creators=[self],
                         creation_op="cross_entropy")
            out.softmax_output = softmax_output
            out.target_dist = target_dist
            return out

        return CTensor(loss)

    # this function is used to randomly pick the corresponding weights for given input data
    def index_select(self, indices):
        if(self.autograd):
            new = CTensor(self.data[indices.data],
                         autograd=True,
                         creators=[self],
                         creation_op="index_select")
            new.index_select_indices = indices
            return new
        return CTensor(self.data[indices.data])

    #Tanh activation
    def tanh(self):
        if(self.autograd):
            return CTensor(np.tanh(self.data),
                          autograd=True,
                          creators=[self],
                          creation_op="tanh")
        return CTensor(np.tanh(self.data))   
    
    def __repr__(self):
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())  

## Defining a super class Layer and usig that to define subclasess like Linear Layer and Sequential layer.

In [0]:
class Layer(object):  
    def __init__(self):
        self.parameters = list()
        
    def get_parameters(self):
        return self.parameters


class Linear(Layer):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        W = np.random.randn(n_inputs, n_outputs) * np.sqrt(1.0/(n_inputs))
        self.weight = CTensor(W, autograd=True)
        self.bias = CTensor(np.zeros(n_outputs), autograd=True)
        
        self.parameters.append(self.weight)
        self.parameters.append(self.bias)

    def forward(self, input):
        return input.mm(self.weight)+self.bias.expand(0,len(input.data))

In [0]:
class Sequential(Layer):
    
    def __init__(self, layers=list()):
        super().__init__()      
        self.layers = layers
    
    def add(self, layer):
        self.layers.append(layer)
        
    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input
    
    def get_parameters(self):
        params = list()
        for l in self.layers:
            params += l.get_parameters()
        return params

## Defining Loss Classes

In [0]:
class MSELoss(Layer):   
    def __init__(self):
        super().__init__()
    
    def forward(self, pred, target):
        return ((pred - target)*(pred - target)).sum(0)

In [0]:
class CrossEntropyLoss():
    def __init__(self):
        super().__init__()
    
    def forward(self, input, target):
        return input.cross_entropy(target)

## Defining Class for SGD Optimizer

In [0]:
class SGD(object):
    def __init__(self, parameters, alpha=0.1):
        self.parameters = parameters
        self.alpha = alpha  
    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0    
    def step(self, zero=True):  
        for p in self.parameters:
            if (p and p.grad):
              p.data -= p.grad.data * self.alpha
              if(zero):
                  p.grad.data *= 0

## Defining Classes for Non Linearity Functions

In [0]:
class Sigmoid(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.sigmoid()

In [0]:
class Relu(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.relu()

In [0]:
class Tanh(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.tanh()

In [0]:
class Softmax(Layer):
    def __init__(self):
      super().__init__()
    def forward(self, x):
      return input.softmax()

In [0]:
class Tanh(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.tanh()

## Defining Class for Embedding Layer which is not used for MNIST data. This layer will be useful to generate a higher order input or for the non-number data like text. This layer is used in NLP for vectorizing the raw text.

In [0]:
class Embedding(Layer): 
    def __init__(self, vocab_size, dim):
        super().__init__()
        
        self.vocab_size = vocab_size
        self.dim = dim
        
        # this random initialiation style is just a convention from word2vec
        self.weight = CTensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)
        
        self.parameters.append(self.weight)
    
    def forward(self, input):
        return self.weight.index_select(input)

In [0]:
images, labels = (x_train, y_train)
test_images, test_labels = (x_test, y_test)

## One hot encoding of labels

In [0]:
one_hot_labels = np.zeros((len(labels), 10))
for i,j in enumerate(labels):
  one_hot_labels[i][j] = 1
labels = one_hot_labels

In [0]:
labels.shape

(60000, 10)

In [0]:
one_hot_labels = np.zeros((len(test_labels), 10))
for i,j in enumerate(test_labels):
  one_hot_labels[i][j] = 1
test_labels = one_hot_labels

In [0]:
nlabels = 10
hidden_size1 = 100
hidden_size2 = 64
npixels = 784
alpha = 0.005
iterations = 100

In [0]:
x_train.shape

(60000, 28, 28)

In [0]:
labels.shape

(60000, 10)

In [0]:
labels = labels.astype(np.int8)

In [0]:
labels[0]

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int8)

In [0]:
x_train = x_train.reshape(60000, 784)

In [0]:
import numpy
np.random.seed(0)

batch_size = 64

model = Sequential([Linear(784,300), Tanh(), Linear(300,100), Tanh(), Linear(100,10), Sigmoid()])
criterion = CrossEntropyLoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.01)

trainloss=0.0
total_loss = 0.0
for i in range(10):
  for j in range(x_train.shape[0]):
    tobj = CTensor(x_train[j,:], autograd=True)
    lobj = CTensor(labels[j,:], autograd=True)
    pred = model.forward(tobj)
    trainloss= criterion.forward(pred, lobj)
    trainloss.backward(CTensor(np.ones_like(trainloss.data)))
    optim.step()
    total_loss = total_loss + trainloss.data
  print('Epoch No: {} loss: {}'.format(i, trainloss.data))

Epoch No: 0 loss: 2.302585184854487
Epoch No: 1 loss: 2.3025851281365872
Epoch No: 2 loss: 2.302585113362383
Epoch No: 3 loss: 2.3025851069040373
Epoch No: 4 loss: 2.302585103366119
Epoch No: 5 loss: 2.302585101163374
Epoch No: 6 loss: 2.3025850996735975
Epoch No: 7 loss: 2.3025850986059053
Epoch No: 8 loss: 2.3025850978072
Epoch No: 9 loss: 2.302585097189673
