In [2]:
import numpy as np
import pandas as pd

#Data Import and Pre-processing

In [5]:
mnist_train = pd.read_csv('mnist_train.csv').sample(n=1000, random_state=42).reset_index(drop=True)
mnist_test = pd.read_csv('mnist_test.csv').sample(n=1000, random_state=42).reset_index(drop=True)

mnist_train.describe()
mnist_test.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,1,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
x_train = mnist_train.drop('label', axis = 1).values
y_train = mnist_train['label'].values.reshape(-1, 1)
x_test = mnist_test.drop('label', axis = 1).values
y_test = mnist_test['label'].values.reshape(-1, 1)


x_train = x_train/255.0
x_test = x_test/255.0

# One-Hot Encoding -:
num_classes = 10

y_train = np.eye(num_classes)[y_train.flatten()]
y_test = np.eye(num_classes)[y_test.flatten()]

In [7]:
print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)

w = np.array([1,2,3,4])
print(w)
w = np.eye(num_classes)[w.flatten()]

(1000, 784)
(1000, 10)
(1000, 784)
(1000, 10)
[1 2 3 4]


#Architecture Design -:

In [8]:
class FeedForwardNeuralNetwork:
  def __init__(self):
    self.W1 = None
    self.B1 = None
    self.W2 = None
    self.B2 = None
    self.W3 = None
    self.B3 = None
    self.__learning_rate = 0.08
    self.epochs = 3000

  def __sigmoid(self, z):
    return 1.0/ (1.0 + np.exp(-z))

  def __pre_activation(self, W, B, X):
    return np.dot(W, X) + B

  def __softmax(self, z):
    exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

  def __dsoftmax(self, Y, A3):
    return A3 - Y

  def __dsigmoid(self, z):
    return z * (1 - z)

  def __forward_prop(self, X, W1, B1, W2, B2, W3, B3):
    H1 = self.__pre_activation(W1.T, B1, X.T)
    A1 = self.__sigmoid(H1)
    H2 = self.__pre_activation(W2, B2, A1)
    A2 = self.__sigmoid(H2)
    H3 = self.__pre_activation(W3.T, B3, A2)
    A3 = self.__softmax(H3)

    return H1, A1, H2, A2, H3, A3

  def __back_prop(self, X, Y, W1, B1, H1, A1, W2, B2, H2, A2, W3, B3, H3, A3):
    m = X.shape[1]  # number of samples

    # Output layer
    dH3 = self.__dsoftmax(Y, A3)
    dW3 = np.dot(dH3, A2.T) / m
    dB3 = np.sum(dH3, axis=1, keepdims=True) / m

    # Hidden layer 2
    dA2 = np.dot(W3, dH3)
    dH2 = dA2 * self.__dsigmoid(A2)
    dW2 = np.dot(dH2, A1.T) / m
    dB2 = np.sum(dH2, axis=1, keepdims=True) / m

    # Hidden layer 1
    dA1 = np.dot(W2, dH2)
    dH1 = dA1 * self.__dsigmoid(A1)
    dW1 = np.dot(dH1, X.T) / m
    dB1 = np.sum(dH1, axis=1, keepdims=True) / m

    # Update weights
    W1 -= self.__learning_rate * dW1.T
    B1 -= self.__learning_rate * dB1
    W2 -= self.__learning_rate * dW2
    B2 -= self.__learning_rate * dB2
    W3 -= self.__learning_rate * dW3.T
    B3 -= self.__learning_rate * dB3

    return (W1, B1, W2, B2, W3, B3)

  def __cross_entropy(self, Y, A3):
    m = Y.shape[1]
    epsilon = 1e-8
    loss = -np.sum(Y * np.log(A3 + epsilon)) / m
    return loss

  def fit(self, x_train, y_train):
    # Random Initialization
    W1 = np.random.randn(784, 32) * 1 / np.sqrt(784)
    W2 = np.random.randn(32, 32) * 1 / np.sqrt(32)
    W3 = np.random.randn(32, 10) * 1 / np.sqrt(32)
    B1 = np.random.randn(32, 1)
    B2 = np.random.randn(32, 1)
    B3 = np.random.randn(10, 1)

    for i in range(self.epochs):
      H1, A1, H2, A2, H3, A3 = self.__forward_prop(x_train, W1, B1, W2, B2, W3, B3)

      if i % 100 == 0:
        print(f"Epoch {i}: Loss = {self.__cross_entropy(y_train, A3)}")

      W1, B1, W2, B2, W3, B3 = self.__back_prop(x_train.T, y_train, W1, B1, H1, A1, W2, B2, H2, A2, W3, B3, H3, A3)

    # Save weights and biases
    self.W1 = W1
    self.B1 = B1
    self.W2 = W2
    self.B2 = B2
    self.W3 = W3
    self.B3 = B3

  def predict(self, x_test):
    W1, W2, W3 = self.W1, self.W2, self.W3
    B1, B2, B3 = self.B1, self.B2, self.B3

    H1 = self.__pre_activation(W1.T, B1, x_test.T)
    A1 = self.__sigmoid(H1)

    H2 = self.__pre_activation(W2, B2, A1)
    A2 = self.__sigmoid(H2)

    H3 = self.__pre_activation(W3.T, B3, A2)
    A3 = self.__softmax(H3)

    y_pred = np.argmax(A3, axis=0)
    return y_pred

  def score(self, x_test, y_test):
    y_pred = self.predict(x_test)
    y_true = np.argmax(y_test, axis=1)

    accuracy = np.mean(y_pred == y_true)
    return (f"{accuracy * 100}%")

In [9]:
model = FeedForwardNeuralNetwork()

model.fit(x_train, y_train.T)
print(model.predict(x_test))
model.score(x_test, y_test)

Epoch 0: Loss = 2.647010240377061
Epoch 100: Loss = 2.2914179660608345
Epoch 200: Loss = 2.281193728336647
Epoch 300: Loss = 2.2574591890606
Epoch 400: Loss = 2.21239599290317
Epoch 500: Loss = 2.135472480293785
Epoch 600: Loss = 2.019663181012198
Epoch 700: Loss = 1.8726610704731288
Epoch 800: Loss = 1.7149524447475282
Epoch 900: Loss = 1.5648992490723188
Epoch 1000: Loss = 1.4284898181897483
Epoch 1100: Loss = 1.304483312422604
Epoch 1200: Loss = 1.1919844676345581
Epoch 1300: Loss = 1.0909465336442015
Epoch 1400: Loss = 1.0009612473924203
Epoch 1500: Loss = 0.9213672922437169
Epoch 1600: Loss = 0.8511709503689703
Epoch 1700: Loss = 0.7889717096080247
Epoch 1800: Loss = 0.7333370143401828
Epoch 1900: Loss = 0.6831322123461481
Epoch 2000: Loss = 0.637608097314924
Epoch 2100: Loss = 0.5963084422405275
Epoch 2200: Loss = 0.5589146891724646
Epoch 2300: Loss = 0.5251243568059694
Epoch 2400: Loss = 0.4946012176536516
Epoch 2500: Loss = 0.466980762129396
Epoch 2600: Loss = 0.441896536205642

'79.4%'

In [10]:
print(model.predict(x_test).shape)
print(x_test.shape)

(1000,)
(1000, 784)


In [11]:
print(y_test)

[[0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
