## Rede Neural - MNIST
#### Aluno: David Maisonnette Jentjens - 1810235

Este notebook contém uma implementação de um algoritmo de deep learning, capaz de classificar imagens de digitos da biblioteca MNIST, através de um aprendizado baseado na técnica de gradiente descendente estocástico. O modelo é implementado apenas utilizando-se o numpy e algumas outras para funcionalidades menores, que não impactam no aprendizado da modelo em si. 

### 0 - Importações

In [1]:
import time
import numpy as np
    
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

from keras.utils.np_utils import to_categorical

### 1 - Preprocessamento

In [2]:
x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)

### 2 - Funções de ativação

In [3]:
def sigmoid(x, derivative=False):
    if derivative:
        return (np.exp(-x))/((np.exp(-x)+1)**2)

    return 1/(1 + np.exp(-x))
    
def softmax(x, derivative=False):
    exps = np.exp(x - x.max())

    if derivative:
        return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))

    return exps / np.sum(exps, axis=0)

In [85]:
class NeuralNetwork:
    def __init__(self, layer_sizes, learn_rate=0.001, epochs=10):
        self.layer_sizes = layer_sizes
        self.learn_rate = learn_rate
        self.epochs = epochs

        self.params = self.initialization()
    
    def initialization(self):
        input_layer = self.layer_sizes[0]
        hidden_layer_1 = self.layer_sizes[1]
        hidden_layer_2 = self.layer_sizes[2]
        output_layer = self.layer_sizes[3]

        params = {
            'W1':np.random.randn(hidden_layer_1, input_layer) * np.sqrt(1. / hidden_layer_1),
            'W2':np.random.randn(hidden_layer_2, hidden_layer_1) * np.sqrt(1. / hidden_layer_2),
            'W3':np.random.randn(output_layer, hidden_layer_2) * np.sqrt(1. / output_layer)
        }

        return params

    def propagate_forward(self, x_train):
        params = self.params

        if(x_train[0] == 'p'):
            return
        
        # input layer activations becomes sample
        params['A0'] = x_train

        # input layer to hidden layer 1
        params['Z1'] = np.dot(params["W1"], params['A0'])
        params['A1'] = sigmoid(params['Z1'])

        # hidden layer 1 to hidden layer 2
        params['Z2'] = np.dot(params["W2"], params['A1'])
        params['A2'] = sigmoid(params['Z2'])

        # hidden layer 2 to output layer
        params['Z3'] = np.dot(params["W3"], params['A2'])
        params['A3'] = softmax(params['Z3'])

        return params['A3']

    def propagate_backward(self, y_train, output):
        params = self.params
        change_w = {}

        # Calculate W3 update
        error = 2 * (output - y_train) / output.shape[0] * softmax(params['Z3'], derivative=True)
        change_w['W3'] = np.outer(error, params['A2'])

        # Calculate W2 update
        error = np.dot(params['W3'].T, error) * sigmoid(params['Z2'], derivative=True)
        change_w['W2'] = np.outer(error, params['A1'])

        # Calculate W1 update
        error = np.dot(params['W2'].T, error) * sigmoid(params['Z1'], derivative=True)
        change_w['W1'] = np.outer(error, params['A0'])

        return change_w

    def update_network_parameters(self, changes_to_w):
        for key, value in changes_to_w.items():
            self.params[key] -= self.learn_rate * value

    def fit(self, x_train, y_train, x_val, y_val):
        start_time = time.time()

        for iteration in range(self.epochs):
            for (i, x), y in zip(x_train.iterrows(), y_train):
                output = self.propagate_forward(x)
                changes_to_w = self.propagate_backward(y, output)
                self.update_network_parameters(changes_to_w)

            accuracy = self.calc_accuracy(x_val, y_val)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                iteration+1, time.time() - start_time, accuracy * 100
            ))

    def calc_accuracy(self, x_val, y_val):
        predictions = []

        for x, y in zip(x_val, y_val):
            output = self.propagate_forward(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))

        return np.mean(predictions)

In [86]:
model = NeuralNetwork(layer_sizes=[784, 128, 64, 10])
model.fit(x_train, y_train, x_val, y_val)

Epoch: 1, Time Spent: 38.71s, Accuracy: 9.82%
Epoch: 2, Time Spent: 73.07s, Accuracy: 9.82%
Epoch: 3, Time Spent: 107.68s, Accuracy: 9.82%
Epoch: 4, Time Spent: 139.99s, Accuracy: 9.82%
Epoch: 5, Time Spent: 173.36s, Accuracy: 9.82%
Epoch: 6, Time Spent: 207.00s, Accuracy: 9.82%
Epoch: 7, Time Spent: 239.53s, Accuracy: 9.82%
Epoch: 8, Time Spent: 273.22s, Accuracy: 9.82%
Epoch: 9, Time Spent: 309.75s, Accuracy: 9.82%
Epoch: 10, Time Spent: 347.52s, Accuracy: 9.82%
