In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.datasets import mnist

In [None]:
class DataScaling:
    def simple_features_scaling(self, pData):
        max = np.max(pData)
        
        return pData / max  
    
    def min_max_scaling(self, pData):
        min = np.min(pData)
        max = np.max(pData)

        return (pData - min) / (max - min)
    

class Normalization:
    def standardization(self, pData):
        std = np.std(pData)
        mean = np.mean(pData)

        return (pData - mean) / std
    
    def mean_normal(self, pData):
        mean = np.mean(pData)
        max = np.max(pData)
        min = np.min(pData)

        return (pData - mean) / (max - min)

    def box_cox_normal(self, pData):
        ...


In [16]:
class ArtificialNeuralNetwork:
    def __init__(self, layers_size):
        # Init the layers size for Neural Network. For example layers_size = [2, 2, 1]
        self.layers_size = layers_size
        # Store all parameters including weight, bias, derivative
        self.parameters = {}
        # Initialize the length of layers size
        self.L = len(self.layers_size)
        # Initialize the length of data row
        self.n = 0
        # Save all cost for loop of each hidden layer
        self.costs = []

    # Initialize the value for weights and biases in network
    def initialize_parameters(self):
        np.random.seed(1)

        for l in range(1, len(self.layers_size)):
            self.parameters["W" + str(l)] = np.random.randn(self.layers_size[l], self.layers_size[l - 1])
            self.parameters["b" + str(l)] = np.random.randn(self.layers_size[l], 1)

    # This activation function is used in hidden layer of ANN
    def sigmoid(self, pZ):  
        return 1 / (1 + np.exp(-pZ))
    
    def sigmoid_derivative(self, pZ):
        s = self.sigmoid(pZ)
        return s * (1 - s)

    # This activation function is used in final layer of ANN
    # pZ is array of z input
    def softmax(self, pZ):
        expZ = np.exp(pZ - np.max(pZ))
        return expZ / np.sum(expZ, axis=0, keepdims=True)
    
    def forward(self, pX):
        store = {}

        A = pX.T
        for l in range(self.L - 1):
            Z = np.dot(self.parameters["W" + str(l + 1)], A) + self.parameters["b" + str(l + 1)]
            A = self.sigmoid(Z)
            store["A" + str(l + 1)] = A
            store["Z" + str(l + 1)] = Z
            store["W" + str(l + 1)] = self.parameters["W" + str(l + 1)]

        Z = np.dot(self.parameters["W" + str(self.L)], A) + self.parameters["b" + str(self.L)]
        A = self.softmax(Z)
        store["A" + str(self.L)] = A
        store["Z" + str(self.L)] = Z
        store["W" + str(self.L)] = self.parameters["W" + str(self.L)]

        return A, store
    
    def backward(self, pX, py, store):
        # This variable stores all derivative of weights and biases
        derivatives = {}

        store["A0"] = pX.T

        A = store["A" + str(self.L)]
        dZ = A - py.T

        dW = dZ.dot(store["A" + str(self.L - 1)].T) / self.n
        db = np.sum(dZ, axis=1, keepdims=True) / self.n
        dAPrev = store["W" + str(self.L)].T.dot(dZ)
    
        derivatives["dW" + str(self.L)] = dW
        derivatives["db" + str(self.L)] = db
    
        for l in range(self.L - 1, 0, -1):
            dZ = dAPrev * self.sigmoid_derivative(store["Z" + str(l)])
            dW = 1. / self.n * dZ.dot(store["A" + str(l - 1)].T)
            db = 1. / self.n * np.sum(dZ, axis=1, keepdims=True)
            
            if l > 1:
                dAPrev = store["W" + str(l)].T.dot(dZ)

            derivatives["dW" + str(l)] = dW
            derivatives["db" + str(l)] = db
    
        return derivatives

    def fit(self, pX, py, learning_rate=0.01, n_iterations=2500, mini_batch=50):
        np.random.seed(1)
    
        self.n = pX.shape[0]
    
        self.layers_size.insert(0, pX.shape[1])
    
        self.initialize_parameters()
        for loop in range(n_iterations):
            for i in range(0, self.n, mini_batch):
                X = pX[i:i + mini_batch]
                y = py[i:i + mini_batch]
                A, store = self.forward(X)
                
                cost = -np.mean(y * np.log(A.T + 1e-8))
                print((y * np.log(A.T + 1e-8))[:5])
                derivatives = self.backward(X, y, store)

                for l in range(1, self.L + 1):
                    self.parameters["W" + str(l)] = self.parameters["W" + str(l)] - learning_rate * derivatives["dW" + str(l)]
                    self.parameters["b" + str(l)] = self.parameters["b" + str(l)] - learning_rate * derivatives["db" + str(l)]

                if loop % 100 == 0:
                    print("Cost: ", cost, "Train Accuracy:", self.predict(X, y))

                if loop % 10 == 0:
                    self.costs.append(cost)

    def predict(self, pX, py):
        A, cache = self.forward(pX)
        y_hat = np.argmax(A, axis=0)
        py = np.argmax(py, axis=1)
        accuracy = (y_hat == py).mean()
        return accuracy * 100

    def plot_cost(self):
        plt.figure()
        plt.plot(np.arange(len(self.costs)), self.costs)
        plt.xlabel("epochs")
        plt.ylabel("cost")
        plt.show()

def pre_process_data(train_x, train_y, test_x, test_y):
    # Normalize
    train_x = train_x / 255.
    test_x = test_x / 255.

    enc = OneHotEncoder(sparse_output=False, categories='auto')
    train_y = enc.fit_transform(train_y.reshape(len(train_y), -1))
    
    test_y = enc.transform(test_y.reshape(len(test_y), -1))
    
    return train_x, train_y, test_x, test_y
    

if __name__ == '__main__':
    (train_x, train_y ), (test_x, test_y) = mnist.load_data()

    train_x = train_x.reshape(-1, 784)
    test_x = test_x.reshape(-1, 784)
    train_x = train_x.astype('float32')
    test_x = test_x.astype('float32')

    train_x, train_y, test_x, test_y = pre_process_data(train_x, train_y, test_x, test_y)
    
    print("train_x's shape: " + str(train_x.shape))
    print("test_x's shape: " + str(test_x.shape))
    
    layers_dims = [50, 512, 10]
    
    ann = ArtificialNeuralNetwork(layers_dims)
    ann.fit(train_x, train_y, learning_rate=0.1, n_iterations=1000, mini_batch=60000)
    print("Train Accuracy:", ann.predict(train_x, train_y))
    print("Test Accuracy:", ann.predict(test_x, test_y))
    ann.plot_cost()

train_x's shape: (60000, 784)
test_x's shape: (10000, 784)
[[-0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -1.84206807e+01 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -0.00000000e+00]
 [-1.84092220e+01 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -0.00000000e+00]
 [-0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -2.85872480e-03 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -0.00000000e+00]
 [-0.00000000e+00 -1.68995744e+01 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -0.00000000e+00]
 [-0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  -0.00000000e+00 -1.35040367e+01]]
Cost:  1.2440543956462282 Train Accuracy: 15.691666666666668
[[ -0.          -0.          -0.          -0

KeyboardInterrupt: 

In [None]:
network = ArtificialNeuralNetwork([2,2,1])

a = np.arange(9).reshape(3,3)
np.argmax(a)