In [185]:
import pandas as pd
import numpy as np
import math

In [2]:
df = pd.read_csv("mnist/mnist_train.csv")

In [175]:
learning_rate = 0.005
epochs = 100
hidden_layer_len = 15
output_layer_len = 10

In [200]:
def activation_sigmoid(x, deriv = False):
    if deriv == True:
        return x*(1-x)
    return 1 / (1 + np.exp(-x))

def activation_tanh(x, deriv = False):
    if deriv == True:
        return 1 - pow(math.tanh(x), 2)
    return math.tanh(x)

def split_train_validation(dataset):
    df_train = pd.DataFrame()
    df_validation = pd.DataFrame()
    for i in range(10):
        selection = dataset['5'] == i
        df_selection = dataset[selection]
        df_train_aux = df_selection.sample(frac = 0.7)
        df_validation_aux = df_selection.drop(df_train_aux.index)
        df_train = pd.concat([df_train, df_train_aux])
        df_validation = pd.concat([df_validation, df_validation_aux])
    df_train = df_train.sample(frac = 1).reset_index(drop = True)
    df_validation = df_validation.sample(frac = 1).reset_index(drop = True)
    return df_train, df_validation

def normalize_data(dataset):
    df_array = dataset.values/255
    return df_array

In [177]:
df_train, df_validation = split_train_validation(df)

X_train = normalize_data(df_train.iloc[:, 1:])
y_train = df_train.iloc[:, :1].values
X_validation = normalize_data(df_validation.iloc[:, 1:])
y_validation = df_validation.iloc[:, :1].values

In [202]:
weights_in = np.random.normal(0, 1, (15, 784)) * 0.2
weights_hidden = np.random.normal(0, 1, (10, 15)) * 0.2

In [203]:
for epoch in range(epochs): 
    for i in range(y_train.size):
        input_layer = X_train[i]
        hidden_layer = activation_sigmoid(np.dot(weights_in, input_layer))
        output_layer = activation_sigmoid(np.dot(weights_hidden, hidden_layer))

        target = np.zeros(10)
        target[y_train[i]] = 1

        output_error = target - pow(output_layer, 2)
        hidden_error = np.dot(weights_hidden.T, output_error)

        hidden_delta = learning_rate * output_error * activation_sigmoid(output_layer, deriv = True)
        hidden_delta = np.outer(hidden_delta, hidden_layer)
        weights_hidden += hidden_delta

        input_delta = learning_rate * hidden_error * activation_sigmoid(hidden_layer, deriv = True)
        input_delta = np.outer(input_delta, input_layer)
        weights_in += input_delta
    if epoch % 10 == 0:
        error = 0
        for j in range(y_validation.size):
            input_layer = X_validation[j]
            hidden_layer = activation_sigmoid(np.dot(weights_in, input_layer))
            output_layer = activation_sigmoid(np.dot(weights_hidden, hidden_layer))

            target = y_validation[j]
            y = np.argmax(output_layer)
            if target != y:
                error += 1

        print("Accuracy de {}% na {}ª epoch".format(100 - (100*error/y_validation.size), epoch+1))

Accuracy de 79.22662370131674% na 1ª epoch
Accuracy de 91.49952775154175% na 11ª epoch
Accuracy de 92.43291293960776% na 21ª epoch
Accuracy de 92.78848824934718% na 31ª epoch
Accuracy de 93.02739041057836% na 41ª epoch
Accuracy de 93.16073115173066% na 51ª epoch
Accuracy de 93.20517806544808% na 61ª epoch
Accuracy de 93.34407467081505% na 71ª epoch
Accuracy de 93.37185399188844% na 81ª epoch
Accuracy de 93.41630090560587% na 91ª epoch


In [57]:
df_test = pd.read_csv("mnist/mnist_test.csv")

In [183]:
X_test = normalize_data(df_test.iloc[:, 1:])
y_test = df_test.iloc[:, :1].values

In [204]:
error = 0
for j in range(y_test.size):
    input_layer = X_test[j]
    hidden_layer = activation_sigmoid(np.dot(weights_in, input_layer))
    output_layer = activation_sigmoid(np.dot(weights_hidden, hidden_layer))
    
    target = y_test[j]
    y = np.argmax(output_layer)
    if target != y:
        error += 1
        
print("Accuracy de {}%".format(100 - (100*error/y_test.size)))

Accuracy de 93.68936893689369%
