In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
# Load MNIST dataset
mnist = fetch_openml('mnist_784')
X, y = mnist.data, mnist.target.astype(np.int)

# Standardize dataset
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Divide data into training and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One hot vector for labels
y_train_one_hot = np.eye(10)[y_train]
y_test_one_hot = np.eye(10)[y_test]

# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def mse(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

def mse_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true)


In [None]:

class NeuralNetwork:
    def __init__(self, layers):
        self.layers = layers
        self.weights = []
        self.biases = []
        for i in range(len(layers) - 1):
            self.weights.append(np.random.randn(layers[i], layers[i + 1]))
            self.biases.append(np.random.randn(layers[i + 1]))

    def forward(self, X):
        self.a = [X]
        for i in range(len(self.layers) - 1):
            z = np.dot(self.a[-1], self.weights[i]) + self.biases[i]
            a = sigmoid(z)
            self.a.append(a)
        return self.a[-1]

    def backward(self, X, y, learning_rate):
        m = X.shape[0]  # Corrected line
        deltas = [mse_derivative(y, self.a[-1]) * sigmoid_derivative(self.a[-1])]
        for i in reversed(range(len(self.layers) - 2)):
            delta = np.dot(deltas[-1], self.weights[i + 1].T) * sigmoid_derivative(self.a[i + 1])
            deltas.append(delta)
        deltas.reverse()

        for i in range(len(self.layers) - 1):
            self.weights[i] -= learning_rate * np.dot(self.a[i].T, deltas[i]) / m
            self.biases[i] -= learning_rate * np.sum(deltas[i], axis=0) / m


    def train(self, X, y, learning_rate, epochs):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            self.backward(X, y, learning_rate)
            if epoch % 100 == 0:
                loss = mse(y, y_pred)
                print(f'Epoch {epoch}, Loss: {loss}')

    def evaluate(self, X, y):
        y_pred = self.forward(X)
        accuracy = np.mean(np.argmax(y_pred, axis=1) == y)
        return accuracy

def NN(X, y, num_of_layers, size_of_layers):
    layers = [X.shape[1]] + size_of_layers
    nn = NeuralNetwork(layers)
    nn.train(X_train, y_train_one_hot, learning_rate=0.1, epochs=1000)
    return nn



In [None]:

# Test the code with different architectures
# 1. NN with 2 layers: 1 hidden layer and 1 output layer
nn1 = NN(X_train, y_train_one_hot, 2, [20, 10])
accuracy1 = nn1.evaluate(X_test, y_test)
print(f'Accuracy (2 layers): {accuracy1 * 100:.2f}%')


Epoch 0, Loss: 0.41948829014853317
Epoch 100, Loss: 0.15214380839630795
Epoch 200, Loss: 0.10482040222146681
Epoch 300, Loss: 0.09869030273791253
Epoch 400, Loss: 0.09636935057335337
Epoch 500, Loss: 0.09429680426374203
Epoch 600, Loss: 0.09193734964569213
Epoch 700, Loss: 0.0898638652665786
Epoch 800, Loss: 0.088187882541152
Epoch 900, Loss: 0.08666624683174308
Accuracy (2 layers): 31.81%


In [None]:
# 2. NN with 3 layers: 2 hidden layers (first layer < second layer) and 1 output layer
nn2 = NN(X_train, y_train_one_hot, 3, [20, 30, 10])
accuracy2 = nn2.evaluate(X_test, y_test)
print(f'Accuracy (3 layers, first layer < second layer): {accuracy2 * 100:.2f}%')


Epoch 0, Loss: 0.4292119216731479
Epoch 100, Loss: 0.1792657592374957
Epoch 200, Loss: 0.17673798098239618
Epoch 300, Loss: 0.175655794249703
Epoch 400, Loss: 0.17442532874202987
Epoch 500, Loss: 0.17296843945314216
Epoch 600, Loss: 0.1714725760553641
Epoch 700, Loss: 0.17006169912491093
Epoch 800, Loss: 0.16877355490114243
Epoch 900, Loss: 0.16758109536439098
Accuracy (3 layers, first layer < second layer): 9.25%


In [None]:

# 3. NN with 3 layers: 2 hidden layers (first layer > second layer) and 1 output layer
nn3 = NN(X_train, y_train_one_hot, 3, [30, 20, 10])
accuracy3 = nn3.evaluate(X_test, y_test)
print(f'Accuracy (3 layers, first layer > second layer): {accuracy3 * 100:.2f}%')

Epoch 0, Loss: 0.5688047162615391
Epoch 100, Loss: 0.11726101072253865
Epoch 200, Loss: 0.08920133606422932
Epoch 300, Loss: 0.0869678558692315
Epoch 400, Loss: 0.08531995164845567
Epoch 500, Loss: 0.0838838447388673
Epoch 600, Loss: 0.08257573298256114
Epoch 700, Loss: 0.0813558589213757
Epoch 800, Loss: 0.08020431904643607
Epoch 900, Loss: 0.07911300690274989
Accuracy (3 layers, first layer > second layer): 35.90%


In [None]:
nn4 = NN(X_train, y_train_one_hot, 3, [50, 25, 10])
accuracy4 = nn4.evaluate(X_test, y_test)
print(f'Accuracy (3 layers, first layer > second layer): {accuracy4 * 100:.2f}%')

Epoch 0, Loss: 0.34092956195740337
Epoch 100, Loss: 0.10054119933537643
Epoch 200, Loss: 0.09538159151010168
Epoch 300, Loss: 0.09326571573824863
Epoch 400, Loss: 0.09176495892471953
Epoch 500, Loss: 0.09051088748383951
Epoch 600, Loss: 0.08937297548037956
Epoch 700, Loss: 0.08830041347528159
Epoch 800, Loss: 0.08726501383763079
Epoch 900, Loss: 0.08623820541190633
Accuracy (3 layers, first layer > second layer): 27.43%


In [None]:
# 1. NN with 2 layers: 1 hidden layer and 1 output layer
nn1 = NN(X_train, y_train_one_hot, 2, [50, 10])
accuracy1 = nn1.evaluate(X_test, y_test)
print(f'Accuracy (2 layers): {accuracy1 * 100:.2f}%')

Epoch 0, Loss: 0.36758514164445366
Epoch 100, Loss: 0.12508059058782184
Epoch 200, Loss: 0.10357525560842316
Epoch 300, Loss: 0.10005700520430758
Epoch 400, Loss: 0.09823425149065693
Epoch 500, Loss: 0.09682193474826481
Epoch 600, Loss: 0.09558690572034607
Epoch 700, Loss: 0.09449213851514328
Epoch 800, Loss: 0.09350924668980076
Epoch 900, Loss: 0.092607019752489
Accuracy (2 layers): 24.26%
