In [1]:
import numpy as np 
import pandas as pd 
from __future__ import print_function


In [2]:
class Layer:
    def __init__(self,units=10):
        super(Layer,self).__init__()
        self.weights=np.random.randn(input.shape[1],units)
        self.bias=np.zeros((units,))
    def forward(self,inputs):
        output=np.matmul(inputs,self.weights)+self.bias
        return output


class Dense(Layer):
    def __init__(self,input_units,output_units,learning_rate=0.1):
        super(Dense,self).__init__()
        self.learning_rate=learning_rate
        self.weights=np.random.randn(input_units,output_units)
        self.bias=np.zeros(output_units)

    def forward(self,inputs):
        self.inputs=inputs  
        return np.matmul(self.inputs,self.weights)+self.bias 
    def backward(self,grad_output):
        
        input_grad=np.dot(grad_output,np.transpose(self.weights))

        grad_weights=np.transpose(np.dot(np.transpose(grad_output),self.inputs))      
        grad_bias=np.sum(grad_output,axis=0)
        self.weights=self.weights-self.learning_rate*grad_weights
        self.bias=self.bias-self.learning*grad_bias
        return input_grad 



Relu(x)=max(0,x0

In [3]:
class ReLU(Layer):
    def __init__(self):
        pass
    def forward(self,inputs):
        self.inputs=inputs
        return np.maximum(0,inputs)

    def backward(self,out_grad):
        relu_grad=self.inputs>0
        return out_grad*relu_grad 
    def anotherback(self,out_grad):
        return np.where((self.inputs>0,out_grad,0))    
        


In [5]:
# class Softmax_crossentropy:
#     def __init__(self):
#         pass
#     def __call__(self,logits,inputs):
#         self.inputs=inputs
#         self.logits=logits
#         logits_for_ans=logits[np.arange(len(logits)),inputs]
#         xentropy=-logits_for_ans+np.log(np.sum(np.exp(logits),axis=-1))
#         return xentropy
#     def backward(self):
    
#         ones_for_answers = np.zeros_like(self.logits)
#         ones_for_answers[np.arange(len(self.logits)),self.inputs] = 1
        
#         softmax = np.exp(self.logits) / np.exp(self.logits).sum(axis=-1,keepdims=True)
        
#         return (- ones_for_answers + softmax) / self.logits.shape[0]  

In [6]:
class Sigmoid:
    def __init__(self):
        pass
    def __call__(self,inputs):
        self.inputs=inputs
        self.sigmoid=np.exp(inputs)/(1+np.exp(inputs))
        return self.sigmoid
    def backward(self,grad):
        return self.sigmoid*(1-self.sigmoid)*grad 
            

In [7]:
class Softmax():
    def __init__(self):
        pass
    def __call__(self,x):
        #np.expand_dims(np.exp(x).sum(axis=1),axis=1)
        self.soft=np.exp(x)/np.exp(x).sum(axis=1)[:None]
        return self.soft
    def backward(self,grad):
        return self.soft*(grad-(grad*self.soft).sum(axis=1)[:None])

In [8]:
class CrossEntropy():
    def __init__(self):
        pass
    def __call__(self,x,y):
        self.old_x=x.clip(min=1e-8,max=None)
        self.old_y=y 
        return (np.where(y==1,-np.log(self.old_x),0)).sum(axis=1)
    def backward(self):
        return np.where(self.old_y==1,-1/self.old_x, 0)   

In [None]:
class Model():
    def __init__(self,layers,cost):
        self.layers=layers
        self.cost=cost 
    def forward(self,x):
        for layer in self.layers:
            x=layer.forward(x)
        return x
    def loss(self,x,y):
        return self.cost(self.forward(x),y)
    def backward(self):
        grad=self.cost.backward()    
        for i in range(len(self.layers)-1,-1,-1):
            grad = self.layers[i].backward(grad)        


In [None]:
def train(model,lr,nb_epoch,data):
    for epoch in range(nb_epoch):
        running_loss = 0.
        num_inputs = 0
        for mini_batch in data:
            inputs,targets = mini_batch
            num_inputs += inputs.shape[0]
            #Forward pass + compute loss
            running_loss += model.loss(inputs,targets).sum()
            #Back propagation
            model.backward()
            #Update of the parameters
            for layer in model.layers:
                if type(layer) == Linear:
                    layer.weights -= lr * layer.grad_w
                    layer.biases -= lr * layer.grad_b
        print(f'Epoch {epoch+1}/{nb_epoch}: loss = {running_loss/num_inputs}')


In [None]:
def load_minibatches(batch_size=64):
    tsfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    trn_set = datasets.MNIST('.', train=True, download=True, transform=tsfms)
    trn_loader = torch.utils.data.DataLoader(trn_set, batch_size=batch_size, shuffle=True, num_workers=0)
    data = []
    for mb in trn_loader:
        inputs_t,targets_t = mb
        inputs = np.zeros((inputs_t.size(0),784))
        targets = np.zeros((inputs_t.size(0),10))
        for i in range(0,inputs_t.size(0)):
            targets[i,targets_t[i]] = 1.
            for j in range(0,28):
                for k in range(0,28):
                    inputs[i,j*28+k] = inputs_t[i,0,j,k]
        data.append((inputs,targets))
    return data