## Many thanks to Andrew W. Task and his Great book Grokking Deep Learning 

In [6]:
import numpy as np

In [93]:
class Tensor(object):
    
    def __init__(self, data,
                 autograd=False,
                 creators=None,
                 creation_op=None,
                 id=None):
        
        self.data = np.array(data)
        self.creators = creators
        self.creation_op = creation_op
        self.grad = None
        self.autograd = autograd
        self.children = {}
        if id is None:
            id = np.random.randint(0, 100000)
        self.id = id
        
        if creators is not None:
            for father in creators:
                if self.id not in father.children:
                    father.children[self.id] = 1
                else:
                    father.children[self.id] += 1
                    
    def all_children_grads_accounter_for(self):
        #in the backward function every time
        #we backprop through a children we decrease
        #the count, so we can confirme if the Tensor
        #has recived the correct number of grad
        
        for children_id, count in self.children.items():
            if count != 0:
                return False
        return True
        
        
    def backward(self, grad=None, grad_origin=None):
        if self.autograd:
            
            if grad is None:
                grad = Tensor(np.ones_like(self.data))
                
            if grad_origin is not None:
                if self.children[grad_origin.id] == 0:
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1
                    
            if self.grad is None:
                if type(grad) == np.ndarray:
                    self.grad = grad.copy()
                else:
                    self.grad = grad
            else:
                self.grad += grad

            if self.creators is not None and (self.all_children_grads_accounter_for() or grad_origin is None):
                
                if self.creation_op == "add":
                    self.creators[0].backward(grad, self)
                    self.creators[1].backward(grad, self)
                    
                if self.creation_op == "neg":
                    self.creators[0].backward(self.grad.__neg__())
                
                if self.creation_op == "sub":
                    new = Tensor(self.grad.data)
                    self.creators[0].backward(new, self)
                    new = Tensor(self.grad.__neg__().data)
                    self.creators[1].backward(new, self)
                    
                if self.creation_op == "mul":
                    new = self.grad.data * self.creators[1].data
                    self.creators[0].backward(new, self)
                    new = self.grad.data * self.creators[0].data
                    self.creators[1].backward(new, self)
                    
                if self.creation_op == "mm":
                    activation = self.creators[0]
                    weights = self.creators[1]
                    new = self.grad.mm(weights.transpose())
                    activation.backward(new)
                    new = self.grad.transpose().mm(activation).transpose()
                    weights.backward(new)
                    
                if self.creation_op == "transpose":
                    self.creators[0].backward(self.grad.transpose())
                    
                if "sum" in self.creation_op:
                    dim = int(self.creation_op.split("_")[1])
                    ds = self.creators[0].data.shape[dim]
                    self.creators[0].backward(self.grad.expand_dimension(dim,ds))
                    
                if "expand" in self.creation_op:
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))

    def sum(self, dimension):
        if self.autograd:
            return Tensor(self.data.sum(dimension),
                         autograd=True,
                         creators=[self],
                         creation_op="sum_" +str(dimension))
        return Tensor(self.data.sum(dimension))
    
    def expand_dimension(self, dimension, copies):
        
        trans_cmd = list(range(0, len(self.data.shape)))
        trans_cmd.insert(dimension, len(self.data.shape))
        new_shape = list(self.data.shape) + [copies]
        new_data = self.data.repeat(copies).reshape(new_shape)
        new_data = new_data.transpose(trans_cmd)
        
        if self.autograd:
            return Tensor(new_data,
                         autograd=True,
                         creators=[self],
                         creation_op="expand_"+str(dimension))
        return Tensor(new_data)
    
    def transpose(self):
        if self.autograd:
            return Tensor(self.data.transpose(),
                         autograd=True,
                         creators=[self],
                         creation_op="transpose")
        return Tensor(self.data.transpose())
    
    def mm(self, x):
        if self.autograd:
            return Tensor(self.data.dot(x.data),
                         autograd=True,
                         creators=[self, x],
                         creation_op="mm")
        return Tensor(self.data.dot(x.data))
        
    def __add__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data + other.data,
                         autograd=True,
                         creators=[self, other],
                         creation_op="add")
        return Tensor(self.data + other.data)
    
    def __neg__(self):
        if self.autograd:
            return Tensor(self.data * -1,
                         autograd=True,
                         creators=[self],
                         creation_op="neg")
        return Tensor(self.data * -1)
    
    def __sub__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data - other.data,
                         autograd=True,
                         creators=[self, other],
                         creation_op="sub")
        return Tensor(self.data - other.data)
    
    def __mul__(self, other):
        if self.autograd and other.autograd:
            return Tensor(self.data * other.data,
                         autograd=True,
                         creators=[self, other],
                         creation_op="mul")
        return Tensor(self.data * other.data)
    
    
    def __repr__(self):
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())

In [89]:
class SGD(object):
    
    def __init__(self, parameters, alpha=0.1):
        self.parameters = parameters
        self.alpha = alpha
        
    def zero(self):
        for parameter in self.parameters:
            parameter.grad.data *= 0
    
    def step(self, zero=True):
        for parameter in self.parameters:
            parameter.data -= parameter.grad.data*self.alpha
            
            if zero:
                parameter.grad.data *= 0

In [90]:
class Layer(object):
    
    def __init__(self):
        self.parameters = list()
        
    def get_parameter(self):
        return self.parameters
    
class Linear(Layer):
    
    def __init__(self, n_inputs, n_output):
        super().__init__()
        w = np.random.randn(n_inputs, n_output)*np.sqrt(2.0/(n_inputs))
        self.weight = Tensor(w, autograd=True)
        self.bias = Tensor(np.zeros(n_output), autograd=True)
        
        self.parameters.append(self.weight)
        self.parameters.append(self.bias)
    
    def forward(self, input):
        return input.mm(self.weight) + self.bias.expand_dimension(0, len(input.data))
    
class Sequential(Layer):
    
    def __init__(self, layers = list()):
        super().__init__()
        
        self.layers = layers
        
    def add(self, layer):
        
        self.layers.append(layer)
    
    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input
    
    def get_parameter(self):
        parameters = list()
        for layer in self.layers:
            parameters +=layer.get_parameter()
        return parameters
            
class MSELoss(Layer):
    
    def __init__(self):
        super().__init__()
        
    def forward(self, predictions, target):
        return ((predictions - target)*(predictions - target)).sum(0)
        
        

In [91]:
np.random.seed(0)

In [95]:
data = Tensor(np.array([[0,0], [0,1], [1,0], [1,1]]), autograd=True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True)

model = Sequential([Linear(2,3), Linear(3,1)])

optmizer = SGD(parameters=model.get_parameter(), alpha=0.05)
criterion = MSELoss()

for i in range(10):
    
    pred = model.forward(data)
    
    loss = criterion.forward(pred, target)
    
    loss.backward()
    
    optmizer.step()
    
    print(loss)

[7.75285243]
[3.62819194]
[1.83000021]
[0.96674948]
[0.55857984]
[0.34433998]
[0.22350151]
[0.15043032]
[0.10386531]
[0.07297629]


In [52]:
(5 + 2*6 +3*6 + 4*9 + 5*18 + 6*25 + 7*31 + 8*120 + 9*25 + 10*5)

1763