In [201]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

In [212]:
data = pd.read_csv('dataset/mnist/train.csv')

data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [203]:
activation_functions = {
  'sigmoid': lambda x: 1 / (1 + np.exp(-x)),
  'tanh': lambda x: np.tanh(x),
  'relu': lambda x: np.maximum(0, x),
  'softmax': lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
}

derivative_activation_functions = {
  'sigmoid': lambda x: x * (1 - x),
  'tanh': lambda x: 1 - x ** 2,
  'relu': lambda x: (x > 0).astype(float),
  'softmax': lambda x: x * (1 - x)
}

def one_hot_enconding(y, num_classes):
  y_one_hot = np.zeros((y.shape[0], num_classes))
  y_one_hot[np.arange(y.shape[0]), y] = 1
  return y_one_hot

In [213]:
X_train, X_test, y_train, y_test = train_test_split(data.drop('label', axis=1), data['label'], test_size=0.2, random_state=42)

In [222]:
X_train = np.array(X_train)
X_train = X_train / 255
y_train = one_hot_enconding(y_train, 10)

X_test = np.array(X_test)
X_test = X_test / 255
y_test = one_hot_enconding(y_test, 10)

In [207]:
  def predict(self, y):
    y.shape += (1,)
    A1 = self.hidden_layer.forward(y)
    A2 = self.output_layer.forward(A1)
    return np.argmax(A2, 0)

  def get_predictions(A2):
    return np.argmax(A2, 0)

  def get_accuracy(predictions, Y):
      print(predictions, Y)
      return np.sum(predictions == Y) / Y.size

  def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

  def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)
    
    current_image = current_image.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()    

In [208]:
class Layer:
  def __init__(self, n_inputs: float, n_neurons: float, activation):
    self.weights = np.random.uniform(-0.5, 0.5, (n_neurons, n_inputs))
    self.biases = np.zeros((n_neurons, 1))
    self.activation = activation
    self.output = None
    self.activation_output = None

  def forward(self, inputs): 
    self.output = np.dot(self.weights, inputs) + self.biases
    self.activation_output = self.activation(self.output)
    return self.activation_output

In [209]:
class MLP:
  def __init__(self, input_layer: int, hidden_layers: int, output_layer: int, learning_rate: float = 0.001, epochs: int = 500, activation: str = 'sigmoid'):
    self.input_layer = input_layer                    
    self.hidden_layers = hidden_layers
    self.output_layer = output_layer
    self.learning_rate = learning_rate
    self.epochs = epochs                                 
    self.activation = activation

    self.hidden_layer = Layer(input_layer, hidden_layers, activation_functions[activation])
    self.output_layer = Layer(hidden_layers, output_layer, activation_functions[activation])

  def fit(self, X, y):
    for epoch in range(self.epochs):
      for img, label in zip(X, y):
        img.shape += (1,)
        label.shape += (1,)

        A1 = self.hidden_layer.forward(img)
        A2 = self.output_layer.forward(A1)

        error_output = A2 - label

        self.output_layer.weights += -self.learning_rate * np.dot(error_output, A1.T)
        self.output_layer.biases += -self.learning_rate * error_output

        error_hidden = np.dot(self.output_layer.weights.T, error_output) * derivative_activation_functions[self.activation](A1)
        self.hidden_layer.weights += -self.learning_rate * np.dot(error_hidden, img.T)
        self.hidden_layer.biases += -self.learning_rate * error_hidden

In [210]:
model = MLP(input_layer = 784, hidden_layers = 10, output_layer = 10, epochs = 3)

In [229]:
model.fit(X_train, y_train)