In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from functools import *
import operator

dataset = pd.read_csv('../mnist/mnist_train.csv')
testset = pd.read_csv('../mnist/mnist_test.csv')

x_test = np.array(testset)[:, 1:] / 255
y_test = np.array(testset)[:, 0]

In [3]:
def sigmoid(x, deriv = False):
    if deriv == True:
        return x*(1-x)
    return 1 / (1 + np.exp(-x))

def tanh(x, deriv = False):
    if deriv == True:
        return 1 - pow(np.tanh(x), 2)
    return np.tanh(x)

In [33]:
class MLPNetwork(object):
    def __init__(self, input_size, hidden_layer_length, output_layer_length=10, activation_fn=sigmoid, lr=0.1, epochs=10):
        self.input_size = input_size
        self.activation_fn = activation_fn
        self.lr = lr
        self.epochs = epochs
        self.output_layer_length = output_layer_length
        self.hidden_layer_length = hidden_layer_length
        
        #Weight inicialization
        self.input_layer_weights = np.random.normal(0, 1, (hidden_layer_length, input_size)) * 0.2
        self.hidden_layer_weights = np.random.normal(0, 1, (output_layer_length, hidden_layer_length)) * 0.2
        
    def guess(self, input_layer):
        hidden_layer = self.activation_fn(np.dot(self.input_layer_weights, input_layer))
        output_layer = self.activation_fn(np.dot(self.hidden_layer_weights, hidden_layer))
        return output_layer, hidden_layer
    
    def train(self, input_layer, answer):
        #Predict
        output_layer, hidden_layer = self.guess(input_layer)
        target = np.zeros(10)
        target[int(answer)] = 1
        
        #Compute hidden layer error
        output_err = target - pow(output_layer, 2)
        hidden_err = np.dot(self.hidden_layer_weights.T, output_err)
        
        hidden_delta = self.lr * output_err * self.activation_fn(output_layer, deriv=True)
        hidden_delta = np.outer(hidden_delta, hidden_layer)
        
        #Apply hidden layer correction to model
        self.hidden_layer_weights += hidden_delta
        
        #Compute input layer error
        input_delta = self.lr * hidden_err * self.activation_fn(hidden_layer, deriv=True)
        input_delta = np.outer(input_delta, input_layer)
        
        #Apply input layer correction to model
        self.input_layer_weights += input_delta
    
    def compute_accuracy(self, x_test, y_test):
        err = 0
        total = y_test.shape[0]
        for test_case, test_answer in zip(x_test, y_test):
            output_layer, hidden_layer = self.guess(test_case)
            prediction = np.argmax(output_layer)
            if prediction != test_answer:
                err += 1
        return err, total
    
    def fit(self, x_train, y_train, x_test, y_test):
        train = np.insert(x_train, 0,  y_train, axis = 1)
        tested_epochs = np.arange(0, self.epochs, int(self.epochs/5) or 1)
        print('Accuracy will be measured in following epochs {}'.format(tested_epochs))
        for epoch in range(self.epochs):
            np.random.shuffle(train)
            x_train = train[:, 1:]
            y_train = train[:, 0]
            for test_case, test_answer in zip(x_train, y_train):
                self.train(test_case, test_answer)
            if(epoch in(tested_epochs)):
                err, total = self.compute_accuracy(x_test, y_test)
                print('Network accuracy at Epoch ' + str(epoch) + ' is : ' + str(100 - float(err)/float(total) * 100)[:4])

In [34]:
#Train data
train_length = int(np.array(dataset).shape[0])
train = np.array(dataset)
x_train = train[:train_length, 1:] / 255
y_train = train[:train_length, 0]

In [43]:
#MLP Class Parameters

input_size = x_train.shape[1]
epochs = 20
lr = 0.0005
activation_fn = sigmoid
hidden_layer_length = 15
output_layer_length = 10

In [44]:
#Train Network

network = MLPNetwork(input_size, hidden_layer_length, activation_fn=activation_fn, lr=lr, epochs=epochs)
network.fit(x_train, y_train, x_test, y_test)

Accuracy will be measured in following epochs [ 0  4  8 12 16]
Network accuracy at Epoch 0 is : 54.6
Network accuracy at Epoch 4 is : 81.8
Network accuracy at Epoch 8 is : 86.9
Network accuracy at Epoch 12 is : 88.8
Network accuracy at Epoch 16 is : 89.8


In [42]:
err,total = network.compute_accuracy(x_test, y_test)
print('Final accuracy: ' + str(100 - err/total * 100)[:4])

Final accuracy: 91.0
