In [18]:
import numpy as np

from layer import Layer

In [19]:
from layers import Dense, reLU, Sigmoid, Reshape,Conv, Tanh, load_data, Layer
from losses import mse, mse_prime


In [20]:
x_train, y_train, x_test, y_test = load_data(1500, flag=True)

In [21]:
x_train.shape

(1500, 1, 28, 28)

In [22]:
y_train.shape

(1500, 2)

In [23]:
class Padding(Layer):
    def __init__(self, input_shape, num_pads):
        self.num_pads = num_pads
        self.input_shape = input_shape
        self.num_channels = input_shape[0]
        super().__init__()

    def forward(self, x):
        self.padded_x= np.zeros((x.shape[0],x.shape[1]+2*self.num_pads,x.shape[2]+2*self.num_pads))
        for i in range(self.num_channels):
            self.padded_x[i] = np.pad(x[i], (self.num_pads,self.num_pads), 'constant',constant_values=(0,0))
            
        return self.padded_x
        
    def backward(self, grad,lr):
        return grad[: , self.num_pads:-self.num_pads, self.num_pads:-self.num_pads]

In [24]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

In [25]:
def predict_test_set(network, input):
    outputs=np.zeros((input.shape[0], network[-2].w.shape[1]))
    for index,x in enumerate(input):
        outputs[index] = predict(network, x)
        
    return outputs
        

In [26]:
def train(network, loss, loss_prime, x_train, y_train, epochs = 1000, learning_rate = 0.01, verbose = True):
    for e in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):

            output = predict(network, x)

            # error
            error += loss(y, output)

            # backward
            grad = loss_prime(y, output)
            for layer in reversed(network):
                grad = layer.backward(grad, learning_rate)
            
        
        error /= len(x_train)
        if verbose:
            print(f"{e + 1}/{epochs}, error={error}")


In [27]:
class Softmax(Layer):
    def forward(self, input):
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backward(self, output_gradient, learning_rate):
        # This version is faster than the one presented in the video
        n = np.size(self.output)
        
        return np.dot( (np.identity(n) - self.output.T) * self.output,output_gradient.T).T
        # Original formula:
        # tmp = np.tile(self.output, n)
        # return np.dot(tmp * (np.identity(n) - np.transpose(tmp)), output_gradient)


In [28]:
network=[
    Padding((1,28,28),1),
    Conv((1,30,30), 3, 3),
    reLU(),
    Reshape((3,1,28,28), 3*28*28),
    Dense(3*28*28, 60,0.1),
    reLU(),
    Dense(60, 2,0.1),
    Sigmoid()
]


In [29]:
train(network, mse, mse_prime, x_train, y_train, epochs = 100, learning_rate = 0.01, verbose = True)

1/100, error=0.4988960017972826
2/100, error=0.1455365803856948
3/100, error=0.010397802753522374
4/100, error=0.008858056215392273
5/100, error=0.007961468767282506
6/100, error=0.0073359857223030435
7/100, error=0.006863491544110227
8/100, error=0.00649186250489715
9/100, error=0.006187682029084591
10/100, error=0.005933134316048035
11/100, error=0.005714922424882422
12/100, error=0.005526070272722492
13/100, error=0.005359171565936962
14/100, error=0.00521136129243648
15/100, error=0.005079038086412111
16/100, error=0.004958970034512359
17/100, error=0.0048493507915675195
18/100, error=0.004748780572255683
19/100, error=0.0046561528950783805
20/100, error=0.0045714017002102215
21/100, error=0.004492291935129788
22/100, error=0.004418494983520312
23/100, error=0.004349240404031756
24/100, error=0.004284531521062284
25/100, error=0.004224752122125378
26/100, error=0.004167062446576176
27/100, error=0.004112759079906638
28/100, error=0.004061113557372091
29/100, error=0.004012517991774

In [30]:
predictions=predict_test_set(network, x_test)

In [31]:
predictions=np.argmax(predictions, axis=1)


In [32]:
y_test=np.argmax(y_test, axis=1)


In [33]:
y_test[0]

1

In [34]:
def accuracy(predictions, y_test):
    return np.mean(predictions == y_test)

print(accuracy(predictions, y_test))

0.9993333333333333
