In [56]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
import math

def normalize(X, axis=-1, order=2):
    l2 = np.atleast_1d(np.linalg.norm(X, order, axis))
    l2[l2 == 0] = 1
    return X / np.expand_dims(l2, axis)
    
def to_categorical(x, n_col=None):
    if not n_col:
        n_col = np.amax(x) + 1
    one_hot = np.zeros((x.shape[0], n_col))
    one_hot[np.arange(x.shape[0]), x] = 1
    return one_hot

class CrossEntropy():
    def __init__(self): pass

    def loss(self, y, p):
        # Avoid division by zero
        p = np.clip(p, 1e-15, 1 - 1e-15)
        return - y * np.log(p) - (1 - y) * np.log(1 - p)

    def acc(self, y, p):
        return accuracy_score(np.argmax(y, axis=1), np.argmax(p, axis=1))

    def gradient(self, y, p):
        # Avoid division by zero
        p = np.clip(p, 1e-15, 1 - 1e-15)
        return - (y / p) + (1 - y) / (1 - p)

#######################################################################


class Sigmoid():
    def __call__(self, x):
        return 1 / (1 + np.exp(-x))

    def gradient(self, x):
        return self.__call__(x) * (1 - self.__call__(x))
    
#########################################################################

class Softmax():
    def __call__(self, x):
        e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return e_x / np.sum(e_x, axis=-1, keepdims=True)

    def gradient(self, x):
        p = self.__call__(x)
        return p * (1 - p)

In [64]:
class MultiLayerPerceptron(object):
    def __init__(self, n_hidden=2):
        self.n_hidden = n_hidden
        self.hidden_activation = Sigmoid()
        self.output_activation = Softmax()
        self.loss = CrossEntropy()

    def accuracy_score(self, y_true, y_pred):
        return np.sum(y_true == y_pred, axis=0) / len(y_true)

    def loss(self, y, p):
        p = np.clip(p, 1e-15, 1 - 1e-15)
        return - y * np.log(p) - (1 - y) * np.log(1 - p)

    def _initialize_weights(self, X, y):
        n_samples, n_features = X.shape
        _, n_outputs = y.shape
        
        limit = 1 / math.sqrt(n_features)
        self.weights = np.random.uniform(-limit, limit, (n_features, self.n_hidden))
        self.bias = np.zeros((1, self.n_hidden))

        limit = 1 / math.sqrt(self.n_hidden)
        self.o_weights = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs))
        self.o_bias = np.zeros((1, n_outputs))

    def fit(self, X, y, bias=0.1, epochs=50, lr=0.001):
        self._initialize_weights(X, y)

        for e in range(epochs):
            hidden_input = X.dot(self.weights) + self.bias#Dot Product
            hidden_output = self.hidden_activation(hidden_input)#Activation Function

            output_layer_output = hidden_output.dot(self.o_weights) + self.o_bias
            y_pred = self.output_activation(output_layer_output)

            grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_output)
            grad_v = hidden_output.T.dot(grad_wrt_out_l_input)
            grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True)

            grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.o_weights.T) * self.hidden_activation.gradient(hidden_input)
            grad_w = X.T.dot(grad_wrt_hidden_l_input)
            grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True)

            self.o_weights -= lr * grad_v
            self.o_bias -= lr * grad_v0
            self.weights -= lr * grad_w
            self.bias -= lr * grad_w0


    def predict(self, X):
        hidden_input = X.dot(self.weights) + self.bias
        hidden_output = self.hidden_activation(hidden_input)
        output_layer_input = hidden_input.dot(self.o_weights) + self.o_bias
        y_pred = self.output_activation(output_layer_input)
        return y_pred

data = datasets.load_digits()
X = normalize(data.data)
y = to_categorical(data.target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = MultiLayerPerceptron()
model.fit(X_train, y_train)

y_pred = np.argmax(model.predict(X_test), axis=1)
y_test = np.argmax(y_test, axis=1)

accuracy = model.accuracy_score(y_test, y_pred)
print ("Accuracy:", accuracy)


Accuracy: 0.125
