In [47]:
import numpy as np
import pickle
import gzip

In [48]:
URL='http://deeplearning.net/data/mnist/'
FILENAME='mnist.pkl.gz'

def load_mnist(filename):
    return pickle.load(gzip.open(filename, 'rb'), encoding='latin-1')

In [49]:
data = load_mnist(URL + FILENAME)

FileNotFoundError: [Errno 2] No such file or directory: 'http://deeplearning.net/data/mnist/mnist.pkl.gz'

In [17]:
class Relu():
    def forward(self, x):
        self.old_x = np.copy(x)
        return np.clip(x, 0, None)
    
    def backward(self, grad):
        return np.where(self.old_x > 0, grad, 0)

In [3]:
class Sigmoid():
    def forward(self, x):
        self.old_y = np.exp(x)/(1. + np.exp(x))
        return self.old_y
    
    def backward(self, grad):
        differentiation = self.old_y * (1 - self.old_y)
        return differentiation * grad

In [6]:
class Softmax():
    def forward(self, x):
        self.old_y = (np.exp(x) / np.exp(x).sum(axis = 1)[:, None])
        return self.old_y

    def backward(self, grad):
        self.old_y * (grad - (grad * self.old_y).sum(axis = 1))[:, None]

In [9]:
class CrossEntropy():
    def forward(self,x,y):
        self.old_x = x.clip(min=1e-8,max=None)
        self.old_y = y
        return (np.where(y==1,-np.log(self.old_x), 0)).sum(axis=1)

    def backward(self):
        return np.where(self.old_y == 0, 1/(1-self.old_x), -1 * (1/self.old_x))

In [10]:
class Linear():
    def __init__(self,n_in,n_out):
        self.weights = np.random.randn(n_in,n_out) * np.sqrt(2/n_in)
        self.biases = np.zeros(n_out)

    def forward(self, x):
        self.old_x = x
        return np.dot(x,self.weights) + self.biases

    def backward(self,grad):
        self.grad_b = grad.mean(axis=0)
        self.grad_w = (np.matmul(self.old_x[:,:,None],grad[:,None,:])).mean(axis=0)
        return np.dot(grad,self.weights.transpose())

In [13]:
class Model():
    def __init__(self, layers, cost):
        self.layers = layers
        self.cost = cost

    def forward(self,x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def loss(self,x,y):
        return self.cost.forward(self.forward(x),y)

    def backward(self):
        grad = self.cost.backward()
        for i in range(len(self.layers)-1,-1,-1):
            grad = self.layers[i].backward(grad)

In [18]:
net = Model([Linear(784,100), Relu(), Linear(100,10), Softmax()], CrossEntropy())

In [19]:
def train(model,lr,nb_epoch,data):
    for epoch in range(nb_epoch):
        running_loss = 0.
        num_inputs = 0
        for mini_batch in data:
            inputs,targets = mini_batch
            num_inputs += inputs.shape[0]
            #Forward pass + compute loss
            running_loss += model.loss(inputs,targets).sum()
            #Back propagation
            model.backward()
            #Update of the parameters
            for layer in model.layers:
                if type(layer) == Linear:
                    layer.weights -= lr * layer.grad_w
                    layer.biases -= lr * layer.grad_b
        print(f'Epoch {epoch+1}/{nb_epoch}: loss = {running_loss/num_inputs}')

In [21]:
data = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

In [41]:
data[None :, :]

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])