In [145]:
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [146]:
# base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None
       
    # compute the output Y of a layer for a give input X
    def forward_prop(self, input):
        raise NotImplementedError
    
    # computes dE/dX for a given dE/dY (and update parameter if any)
    def backward_prop(self, output_err, learning_rate):
        raise NotImplementedError

In [147]:
# activation functions
def relu(input_value):
    return np.maximum(input_value, 0)


def softmax_activation(x):
    # activate = np.exp(z) / sum(np.exp(z))
    # return activate
    output = np.exp(x) / np.sum(np.exp(x), axis=0)
    return output

def relu_prime(x):
    return x > 0

def one_hot_fn(y_label):
    one_hot_label = np.zeros((y_label.size, y_label.max() + 1))
    one_hot_label[np.arange(y_label.size), y_label] = 1
    one_hot_label = one_hot_label.T
    return one_hot_label

In [148]:
class ConnectLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    
    def __init__(self, input_size, output_size):
        # self.y = y
        # self.epochs = epochs
        # self.learning_rate = learning_rate
        # self.w = np.random.randn(input_size, output_size) * np.sqrt(1 / (input_size + output_size))
        # self.b = 1
        self.w = np.random.rand(input_size, output_size) - 0.5
        self.b = np.random.rand(1, output_size) - 0.5
        
    # return output for a given input
    def forward_prop(self, input_ds):
        self.input = input_ds
        # self.output = np.dot(self.input, self.w) + self.b
        self.output = np.dot(self.input, self.w) + self.b
        return self.output
    
    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_err=dE/dX
    def backward_prop(self, o_err, learning_rate):
        # print(o_err.shape)
        # print(self.input.shape)
        input_err = np.dot(o_err, self.w.T)
        # print(input_err.shape)
        if len(self.input.shape) == 1:
            self.input = np.expand_dims(self.input, axis=0)
        w_err = np.dot(self.input.T, o_err)
        # w_err = self.input.T.dot(o_err)
        # dB = o_err
        
        # self.output_error = self.y - self.output
        
        # update parameters
        self.w -= learning_rate * w_err + 0.5 * self.w
        self.b -= learning_rate * o_err
        # print(self.b)
        return input_err

In [149]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
        
    # return the activation input
    def forward_prop(self, input_ds):
        self.input = input_ds
        self.output = self.activation(self.input)
        return self.output
    
    # return input_error=dE/dX for a given output_error=dE/dY
    def backward_prop(self, output_err, learning_rate):
        return self.activation_prime(self.input) * output_err

In [150]:
# loss function and its prime
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

def sigmoid(x): 
    return 1.0/(1.0 + np.exp(-x))

def deri_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [151]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_prop(output)
            result.append(output)

        return result

    # train the network
    def gradient_descent(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_prop(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_prop(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            # if i % 10 == 0:
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [152]:
train_ds = np.loadtxt('../assets/training_dataset.txt', dtype=float)
test_ds = np.loadtxt('../assets/test_dataset.txt', dtype=float)

In [153]:
def data_extract(train_data):
    data_list = list()
    label_list = list()
    for val in train_data:
        data_list.append(val[0:784])
        label_list.append(int(val[784]))
    data_list = np.array(data_list)
    label_list = np.array(label_list)
    return data_list, label_list

np.random.shuffle(train_ds)

train_dataset, y_label = data_extract(train_ds)
train_dataset = train_dataset
x_train = train_dataset
# x_train = x_train.reshape(x_train[0], 1, 28*28)
x_train = x_train / 255

In [154]:
# Network
net = Network()
net.add(ConnectLayer(784, 150))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(Activation(relu, relu_prime))
net.add(ConnectLayer(150, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(Activation(relu, relu_prime))
net.add(ConnectLayer(50, 10))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(Activation(sigmoid, deri_sigmoid))
m, n = x_train.shape


In [155]:
x_train.shape

(4000, 784)

In [156]:
net.use(mse, mse_prime)
net.gradient_descent(x_train, y_label, epochs=500, learning_rate=0.01)

# test on 3 samples
out = net.predict(y_label[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_label[0:3])

epoch 1/500   error=20.658304
epoch 2/500   error=20.517911
epoch 3/500   error=20.510354
epoch 4/500   error=20.507308
epoch 5/500   error=20.505652
epoch 6/500   error=20.504608
epoch 7/500   error=20.503891
epoch 8/500   error=20.503367
epoch 9/500   error=20.502967
epoch 10/500   error=20.502652
epoch 11/500   error=20.502398
epoch 12/500   error=20.502188
epoch 13/500   error=20.502012
epoch 14/500   error=20.501862
epoch 15/500   error=20.501733
epoch 16/500   error=20.501621
epoch 17/500   error=20.501522
epoch 18/500   error=20.501435
epoch 19/500   error=20.501357
epoch 20/500   error=20.501287
epoch 21/500   error=20.501224
epoch 22/500   error=20.501167
epoch 23/500   error=20.501115
epoch 24/500   error=20.501067
epoch 25/500   error=20.501024
epoch 26/500   error=20.500983
epoch 27/500   error=20.500946
epoch 28/500   error=20.500912
epoch 29/500   error=20.500880
epoch 30/500   error=20.500850
epoch 31/500   error=20.500822
epoch 32/500   error=20.500796
epoch 33/500   er

KeyboardInterrupt: 