<h1><center>- Assignment 3 -</center></h1>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

#### 1. Load MNIST dataset

In [2]:
train_data = pd.read_csv('mnist_train.csv')
test_data = pd.read_csv('mnist_test.csv')

In [3]:
s1 = train_data.sample(n = 1000)
s2 = test_data.sample(n = 100)

In [4]:
train_features = s1.drop(['label'], axis=1)
train_labels = s1['label']

test_features = s2.drop(['label'], axis=1)
test_labels = s2['label']

#### 2. Standardize your dataset

In [5]:
train_features = (train_features - np.mean(train_features, axis=0)) / np.std(train_features, axis=0)
train_features = train_features.fillna(0)

#### 3. Divide data into training and test

In [6]:
X_train, X_test, y_train, y_test = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

#### 4. Apply one hot vector for labels (meaning the value is 1 in the correct class and 0 in the rest, there will be 10 classes so a vector of 10)

creates a zero-filled NumPy array with a shape of (n_samples, n_classes), where n_samples is the number of samples in the y_train array and n_classes is the maximum value in the y_train array plus one.

y_train is an array of labels with values 0 and 1, the y_train_one_hot array will have a shape of (n_samples, 2).

assign a value of 1 to the corresponding element in each row of the y_train_one_hot array based on the value in the corresponding element of the y_train array.

In [7]:
y_train_one_hot = np.zeros((y_train.size, y_train.max()+1))
y_train_one_hot[np.arange(y_train.size), y_train] = 1

y_test_one_hot = np.zeros((y_test.size, y_test.max()+1))
y_test_one_hot[np.arange(y_test.size), y_test] = 1

#### 5. Implement a dynamic Neural Network from scratch

In [29]:
class NeuralNetwork:
    def __init__(self, learning_rate = 0.1, epochs = 1000):
        self.weights = []
        self.bias = []
        self.lr = learning_rate
        self.epochs = epochs
        pass

    # Activation function
    def sigmoid(self, x):
        return np.piecewise(x, [x > 0],[lambda i: 1 / (1 + np.exp(-i)), lambda i: np.exp(i) / (1 + np.exp(i))],)

    def sigmoid_derivative(self, a):
        return np.dot(a.T, (1-a))

    # Loss function
    def MSE(self, y_actual, y_pred):
        return np.mean((y_pred - y_actual) ** 2)

    def MSE_derivative(self, y_actual, y_pred):
        return y_pred - y_actual

    # Dynamic neural network function
    def NN(self, x, y, num_of_layers, size_of_layers):
        samples, features = x.shape
        dim = features

        # Initialize the weights and bias of the layers with random values
        for layer in range(num_of_layers):
            self.weights.append(np.random.randn(size_of_layers[layer], dim))
            self.bias.append(np.random.randn(size_of_layers[layer]))
            dim = np.shape(self.weights[layer])[0]

        for epoch in range(self.epochs):
            A = []
            X = x

            # Forward propagation
            for i in range(num_of_layers):
                z = np.dot(X, np.transpose(self.weights[i])) + self.bias[i]
                a = self.sigmoid(z)
                A.append(a.T)
                X = a

            # Calculate loss
            loss = self.MSE(A[-1], y)

            # Backpropagation
            dz = 1
            for j in range(num_of_layers - 1, -1, -1):

                if j - 1 >= 0: X = A[j - 1]
                else: X = x.T

                if j == num_of_layers - 1:    # Output Layer
                    dz = dz * self.MSE_derivative(A[-1], y)
                else:                         # Hidden Layer
                    dz = np.dot(dz.T, self.weights[j + 1]).T
                dz = np.dot(dz, self.sigmoid_derivative(A[j]))
                dw = np.dot(dz, X.T)

                # update weights
                self.weights[j] -= (dw * self.lr) / size_of_layers[j]

                # update bias
                for k in range(len(self.bias[j])):
                    self.bias[j][k] -= (np.mean(dz[k]) * self.lr)

    # Test function
    def test(self, X_test, num_of_layers):
        a = X_test
        for i in range(num_of_layers):
            z = np.dot(a, np.transpose(self.weights[i])) + self.bias[i]
            a = self.sigmoid(z)
            
        a = np.transpose(a)
        return a
    
    # Accuracy function
    def calculate_accuracy(self, y_pred, y_test_one_hot):
        true_labels = np.argmax(y_pred, axis=1)
        return np.mean(true_labels == y_test_one_hot)

#### 7. Test your code with the following architectures and report your different accuracies for each case from the following:

- 1.  Build NN with only 2 layers => 1 hidden layer and 1 output layer

In [31]:
case_1 = NeuralNetwork()
case_1.NN(X_train, y_train_one_hot.T, 2, [2,10])
y_pred = case_1.test(X_test, 2)
accuracy = case_1.calculate_accuracy(y_pred , y_test_one_hot)
print('Accuracy with 2 layers: {:.2f}%'.format(accuracy * 100))

Accuracy with 2 layers: 90.00%


- 2. Build NN with 3 layers=> 2 hidden layers Where # of neurons in first layer < # of neurons in second layer and 1 output layer

In [24]:
case_2 = NeuralNetwork()
case_2.NN(X_train, y_train_one_hot.T, 3, [20, 40, 10])
y_pred = case_2.test(X_test, 3)
accuracy = case_2.calculate_accuracy(y_pred , y_test_one_hot)
print('Accuracy with 3 layers (neurons in first layer < neurons in second layer): {:.2f}%'.format(accuracy * 100))

Accuracy with 3 layers (neurons in first layer < neurons in second layer): 90.00%


- 3. Build NN with 3 layers=> 2 hidden layers Where # of neurons in first layer > # of neurons in second layer

In [19]:
case_3 = NeuralNetwork()
case_3.NN(X_train, y_train_one_hot.T, 3, [40, 20, 10])
y_pred = case_3.test(X_test, 3)
accuracy = case_3.calculate_accuracy(y_pred , y_test_one_hot)
print('Accuracy with 3 layers (neurons in first layer > neurons in second layer): {:.2f}%'.format(accuracy * 100))

Accuracy with 3 layers (neurons in first layer > neurons in second layer): 90.00%
