### CMPE 258 HW6 Part a
#### Jimmy Liang

Using examples from: https://github.com/iamtrask/Grokking-Deep-Learning/blob/master/Chapter13%20-%20Intro%20to%20Automatic%20Differentiation%20-%20Let's%20Build%20A%20Deep%20Learning%20Framework.ipynb


In [1]:
import numpy as np


In [4]:
class Tensor(object):

  def __init__(self, data, autograd=False, creators=None, creation_op=None, id=None):
    self.data = np.array(data)
    self.autograd = autograd
    self.grad = None
    if(id is None):
      self.id = np.random.randint(0,100000)
    else:
        self.id = id

    self.creators = creators
    self.creation_op = creation_op
    self.children = {}

    if(creators is not None):
            for c in creators:
                if(self.id not in c.children):
                    c.children[self.id] = 1
                else:
                    c.children[self.id] += 1

  def all_children_grads_accounted_for(self):
        for id,cnt in self.children.items():
            if(cnt != 0):
                return False
        return True


  def backward(self,grad=None, grad_origin=None):
        if(self.autograd):
 
            if(grad is None):
                grad = Tensor(np.ones_like(self.data))

            if(grad_origin is not None):
                if(self.children[grad_origin.id] == 0):
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1

            if(self.grad is None):
                self.grad = grad
            else:
                self.grad += grad
            
            assert grad.autograd == False
            
            if(self.creators is not None and 
               (self.all_children_grads_accounted_for() or 
                grad_origin is None)):

                if(self.creation_op == "add"):
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self)
                    
                if(self.creation_op == "sub"):
                    self.creators[0].backward(Tensor(self.grad.data), self)
                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)

                if(self.creation_op == "mul"):
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new , self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)                    
                    
                if(self.creation_op == "mm"):
                    c0 = self.creators[0]
                    c1 = self.creators[1]
                    new = self.grad.mm(c1.transpose())
                    c0.backward(new)
                    new = self.grad.transpose().mm(c0).transpose()
                    c1.backward(new)
                    
                if(self.creation_op == "transpose"):
                    self.creators[0].backward(self.grad.transpose())

                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.expand(dim,
                                                               self.creators[0].data.shape[dim]))

                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if(self.creation_op == "neg"):
                    self.creators[0].backward(self.grad.__neg__())
                    
                if(self.creation_op == "sigmoid"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                
                if(self.creation_op == "tanh"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (ones - (self * self)))
                
                if(self.creation_op == "index_select"):
                    new_grad = np.zeros_like(self.creators[0].data)
                    indices_ = self.index_select_indices.data.flatten()
                    grad_ = grad.data.reshape(len(indices_), -1)
                    for i in range(len(indices_)):
                        new_grad[indices_[i]] += grad_[i]
                    self.creators[0].backward(Tensor(new_grad))


  def __add__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data + other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="add")
        return Tensor(self.data + other.data)

  def __neg__(self):
        if(self.autograd):
            return Tensor(self.data * -1,
                          autograd=True,
                          creators=[self],
                          creation_op="neg")
        return Tensor(self.data * -1)
    
  def __sub__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data - other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="sub")
        return Tensor(self.data - other.data)
    
  def __mul__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data * other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="mul")
        return Tensor(self.data * other.data)    

  def sum(self, dim):
        if(self.autograd):
            return Tensor(self.data.sum(dim),
                          autograd=True,
                          creators=[self],
                          creation_op="sum_"+str(dim))
        return Tensor(self.data.sum(dim))
    
  def expand(self, dim,copies):

        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(dim,len(self.data.shape))
        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
        
        if(self.autograd):
            return Tensor(new_data,
                          autograd=True,
                          creators=[self],
                          creation_op="expand_"+str(dim))
        return Tensor(new_data)
    
  def transpose(self):
        if(self.autograd):
            return Tensor(self.data.transpose(),
                          autograd=True,
                          creators=[self],
                          creation_op="transpose")
        
        return Tensor(self.data.transpose())
    
  def mm(self, x):
        if(self.autograd):
            return Tensor(self.data.dot(x.data),
                          autograd=True,
                          creators=[self,x],
                          creation_op="mm")
        return Tensor(self.data.dot(x.data))
    
  def sigmoid(self):
        if(self.autograd):
            return Tensor(1 / (1 + np.exp(-self.data)),
                          autograd=True,
                          creators=[self],
                          creation_op="sigmoid")
        return Tensor(1 / (1 + np.exp(-self.data)))

  def tanh(self):
        if(self.autograd):
            return Tensor(np.tanh(self.data),
                          autograd=True,
                          creators=[self],
                          creation_op="tanh")
        return Tensor(np.tanh(self.data))
    
  def index_select(self, indices):

        if(self.autograd):
            new = Tensor(self.data[indices.data],
                         autograd=True,
                         creators=[self],
                         creation_op="index_select")
            new.index_select_indices = indices
            return new
        return Tensor(self.data[indices.data])
    
  def __repr__(self):
        return str(self.data.__repr__())
    
  def __str__(self):
        return str(self.data.__str__()) 
    

In [12]:
## Stochastic Gradient Descent
class SGD(object):
    
    def __init__(self, parameters, alpha=0.1):
        self.parameters = parameters
        self.alpha = alpha
    
    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0
        
    def step(self, zero=True):
        
        for p in self.parameters:
            
            p.data -= p.grad.data * self.alpha
            
            if(zero):
                p.grad.data *= 0

In [8]:
## Layers
class Layer(object):
    
    def __init__(self):
        self.parameters = list()
        
    def get_parameters(self):
        return self.parameters


class Linear(Layer):

    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0/(n_inputs))
        self.weight = Tensor(W, autograd=True)
        self.bias = Tensor(np.zeros(n_outputs), autograd=True)
        
        self.parameters.append(self.weight)
        self.parameters.append(self.bias)

    def forward(self, input):
        return input.mm(self.weight)+self.bias.expand(0,len(input.data))

In [9]:
class Sequential(Layer):
    
    def __init__(self, layers=list()):
        super().__init__()
        
        self.layers = layers
    
    def add(self, layer):
        self.layers.append(layer)
        
    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input
    
    def get_parameters(self):
        params = list()
        for l in self.layers:
            params += l.get_parameters()
        return params

In [10]:
## Loss Function
class MSELoss(Layer):
    
    def __init__(self):
        super().__init__()
    
    def forward(self, pred, target):
        return ((pred - target)*(pred - target)).sum(0)

In [11]:
## Activations

class Tanh(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.tanh()
    
class Sigmoid(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.sigmoid()

In [14]:
## Unit Testing

a = Tensor([1,2,3,4,5], autograd=True)
b = Tensor([2,2,2,2,2], autograd=True)
c = Tensor([5,4,3,2,1], autograd=True)

d = a + b
e = b + c
f = d + e

f.backward(Tensor(np.array([1,1,1,1,1])))

print(b.grad.data == np.array([2,2,2,2,2]))

[ True  True  True  True  True]


In [6]:
from keras.datasets import mnist
import matplotlib.pyplot as plt
from keras.utils import np_utils
from scipy.special import softmax

In [27]:
## MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28*28) / 255.
x_test = x_test.reshape(x_test.shape[0], 28*28) / 255.
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

In [28]:
x_train = x_train[:1000]
y_train = y_train[:1000]
x_test = x_test[:100]
y_test = y_test[:100]
input_dim = x_train.shape[1]
num_classes = y_test.shape[1]

In [29]:
print('x_train.shape', x_train.shape)
print('x_test.shape', x_test.shape)
print('y_train.shape', y_train.shape)
print('y_test.shape', y_test.shape)

x_train.shape (1000, 784)
x_test.shape (100, 784)
y_train.shape (1000, 10)
y_test.shape (100, 10)


In [30]:
def train_on_batch(x, y):
  x, y = Tensor(x,autograd=True), Tensor(y,autograd=True)
  y_pred = model.forward(x)
  loss = criterion.forward(y_pred, y)
  loss.backward()
  optim.step()
  return model, loss

In [31]:
data = Tensor(x_train, autograd=True)
target = Tensor(y_train, autograd=True)

model = Sequential([Linear(784,200), Tanh() ,Linear(200,80), Tanh(), Linear(80,10), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.001)
batch_size = 256
for epoch in range(500):
    for i in range(int(len(data.data)/batch_size)):
      batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))
      model, loss = train_on_batch(data.data[batch_start:batch_end], target.data[batch_start:batch_end])

    if epoch % 5 == 0:
        pred = model.forward(data)
        train_correct = (np.argmax(target.data,axis=1) == np.argmax(pred.data,axis=1)).mean()
        print("Train Accuracy:",train_correct)

Train Accuracy: 0.349
Train Accuracy: 0.764
Train Accuracy: 0.84
Train Accuracy: 0.874
Train Accuracy: 0.894
Train Accuracy: 0.903
Train Accuracy: 0.917
Train Accuracy: 0.924
Train Accuracy: 0.929
Train Accuracy: 0.934
Train Accuracy: 0.94
Train Accuracy: 0.941
Train Accuracy: 0.943
Train Accuracy: 0.946
Train Accuracy: 0.948
Train Accuracy: 0.949
Train Accuracy: 0.949
Train Accuracy: 0.952
Train Accuracy: 0.953
Train Accuracy: 0.953
Train Accuracy: 0.953
Train Accuracy: 0.953
Train Accuracy: 0.953
Train Accuracy: 0.954
Train Accuracy: 0.954
Train Accuracy: 0.955
Train Accuracy: 0.955
Train Accuracy: 0.954
Train Accuracy: 0.954
Train Accuracy: 0.954
Train Accuracy: 0.954
Train Accuracy: 0.954
Train Accuracy: 0.954
Train Accuracy: 0.955
Train Accuracy: 0.955
Train Accuracy: 0.955
Train Accuracy: 0.955
Train Accuracy: 0.955
Train Accuracy: 0.955
Train Accuracy: 0.956
Train Accuracy: 0.956
Train Accuracy: 0.956
Train Accuracy: 0.956
Train Accuracy: 0.956
Train Accuracy: 0.956
Train Accura