In [96]:
import numpy as np
from keras.datasets import mnist
from scipy.signal import correlate2d, convolve2d
from layer import Layer
from keras.utils import to_categorical


In [64]:

def preprocess_data(x, y, limit):
    zero_index = np.where(y == 0)[0][:limit]
    one_index = np.where(y == 1)[0][:limit]
    all_indices = np.hstack((zero_index, one_index))
    all_indices = np.random.permutation(all_indices)
    x, y = x[all_indices], y[all_indices]
    x = x.reshape(len(x), 1, 28, 28)
    x = x.astype("float32") / 255
    
    return x, y

# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 1000)
x_test, y_test = preprocess_data(x_test, y_test, 1000)

In [65]:
x_train.shape

(2000, 1, 28, 28)

In [78]:
x_test=x_test.reshape(-1,28*28)
y_test=y_test.reshape(-1,1)

In [80]:
x_train.shape

(2000, 784)

In [66]:
x_train=x_train.reshape(-1,28*28)
y_train=y_train.reshape(-1,1)

In [67]:
y_train[0]

array([0], dtype=uint8)

In [68]:
def binary_crossentropy(y_true, y_pred):
    return -y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred)

In [69]:
def binary_crossentropy_derivative(y_true, y_pred):
   return (y_pred - y_true) / (y_pred * (1 - y_pred))

In [70]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        # TODO: return output
        pass

    def backward(self, output_gradient, learning_rate):
        # TODO: update parameters and return input gradient
        pass


In [71]:
class Dense(Layer):
    def __init__(self, input_size, output_size, alpha):
        self.weights = np.random.rand(input_size, output_size)-0.5
        self.bias = np.random.rand(1,output_size)-0.5
        self.alpha = alpha
    def forward(self, input):
        self.input = input
        self.output = np.matmul(input, self.weights) + self.bias
        return self.output

    def backward(self, output_gradient, learning_rate):
        weights_gradient = self.input.T.dot(output_gradient) + self.alpha * self.weights
        input_gradient = output_gradient.dot(self.weights.T)
        
      
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient


In [72]:

class reLU(Layer):
    def __init__(self):
        pass

    def forward(self, input):
        self.input = input
        self.output = np.maximum(self.input, 0)
        return self.output

    def backward(self, output_gradient, learning_rate):
        return output_gradient * (self.input > 0)

In [73]:

class Sigmoid(Layer):
    def __init__(self):
        pass

    def forward(self, input):
        self.input = input
        self.output = 1 / (1 + np.exp(-self.input))
        return self.output

    def backward(self, output_gradient, learning_rate):
        return output_gradient * self.output * (1 - self.output)

In [97]:
class Convolutional(Layer):
    def __init__(self, input_shape, kernel_size, depth):
        input_depth, input_height, input_width = input_shape
        self.depth = depth
        self.input_shape = input_shape
        self.input_depth = input_depth
        self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size + 1)
        self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
        self.kernels = np.random.randn(*self.kernels_shape)
        self.biases = np.random.randn(*self.output_shape)

    def forward(self, input):
        self.input = input
        self.output = np.copy(self.biases)
        for i in range(self.depth):
            for j in range(self.input_depth):
                self.output[i] += correlate2d(self.input[j], self.kernels[i, j], "valid")
        return self.output

    def backward(self, output_gradient, learning_rate):
        kernels_gradient = np.zeros(self.kernels_shape)
        input_gradient = np.zeros(self.input_shape)

        for i in range(self.depth):
            for j in range(self.input_depth):
                kernels_gradient[i, j] = correlate2d(self.input[j], output_gradient[i], "valid")
                input_gradient[j] += convolve2d(output_gradient[i], self.kernels[i, j], "full")

        self.kernels -= learning_rate * kernels_gradient
        self.biases -= learning_rate * output_gradient
        return input_gradient

In [74]:
def train(network, x_train, y_train, learning_rate, epochs, loss_function, loss_derivative):
    for epoch in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):
            # forward
            output = x.reshape(1,-1)
            for layer in network:
                output = layer.forward(output)
            error += binary_crossentropy(y, output)
           
           
            # backward
            output_gradient = binary_crossentropy_derivative(y, output)
           
            for layer in reversed(network):
                output_gradient = layer.backward(output_gradient, learning_rate)
               
        print(f"Epoch {epoch}, error: {error}")

In [77]:
def predict(network, x_dataset):
    results=np.zeros(x_dataset.shape[0])
    for index,x in enumerate(x_dataset):
        output = x.reshape(1,-1)
        for layer in network:
            output = layer.forward(output)
        results[index]=output
    return results



In [75]:
network=[
    Convolutional((1, 28, 28), 3, 5),
    Dense(28*28, 40,0.01),
    reLU(),
    Dense(40, 1,0.01),
    Sigmoid()
]


In [76]:
train(network, x_train, y_train, 0.01, 100, binary_crossentropy, binary_crossentropy_derivative)

Epoch 0, error: [[-10688.62416122]]
Epoch 1, error: [[-2801.05462769]]
Epoch 2, error: [[-2268.11180785]]
Epoch 3, error: [[-2107.32383404]]
Epoch 4, error: [[-2016.30990799]]
Epoch 5, error: [[-1968.07766726]]
Epoch 6, error: [[-1938.48682008]]
Epoch 7, error: [[-1904.22446223]]
Epoch 8, error: [[-1878.50473727]]
Epoch 9, error: [[-1855.15801487]]
Epoch 10, error: [[-1838.9511582]]
Epoch 11, error: [[-1824.68444313]]
Epoch 12, error: [[-1811.95402489]]
Epoch 13, error: [[-1799.38090077]]
Epoch 14, error: [[-1789.5923762]]
Epoch 15, error: [[-1781.1138455]]
Epoch 16, error: [[-1773.58185686]]
Epoch 17, error: [[-1766.97275018]]
Epoch 18, error: [[-1761.38415264]]
Epoch 19, error: [[-1755.55908487]]
Epoch 20, error: [[-1751.9153298]]
Epoch 21, error: [[-1746.62631923]]
Epoch 22, error: [[-1742.39622398]]
Epoch 23, error: [[-1737.8444319]]
Epoch 24, error: [[-1733.91352699]]
Epoch 25, error: [[-1730.81795444]]
Epoch 26, error: [[-1726.58851016]]
Epoch 27, error: [[-1723.74061576]]
Epoch 

In [81]:
results=predict(network,x_test)

  results[index]=output


In [87]:
results_binary = np.where(results > 0.5, 1, 0)

In [91]:
results_binary=results_binary.reshape(-1,1)

In [95]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, results_binary)

0.998989898989899