In [1]:
def one_hot_encode(y):
    values = np.unique(y).size
    encoded_outputs = []

    for i in range(len(y)):
        new_output = [0] * values
        new_output[y[i]] = 1
        encoded_outputs.append(new_output)

    return np.array(encoded_outputs)

In [2]:
import numpy as np
import tensorflow

(x_train, y_train), (x_test, y_test) = tensorflow.keras.datasets.mnist.load_data()

x_train = x_train.reshape((60000, 784))
x_test = x_test.reshape((10000, 784))

x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

y_test_original = y_test

y_train = one_hot_encode(y_train)
y_test = one_hot_encode(y_test)

2025-10-01 21:11:28.452666: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-10-01 21:11:28.452897: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-01 21:11:28.490646: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-01 21:11:29.288836: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To tur

In [3]:
class DFN:
    def __init__(self):
        
        self.weights = []
        self.weights.append(0)
        self.weights.append(np.random.uniform(low = -1, high = 1, size = (784, 128)))
        self.weights.append(np.random.uniform(low = -1, high = 1, size = (128, 16)))
        self.weights.append(np.random.uniform(low = -1, high = 1, size = (16, 10)))

        self.biases = []
        self.biases.append(0)
        self.biases.append(np.zeros((128)))
        self.biases.append(np.zeros((16)))
        self.biases.append(np.zeros((10)))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(- x))

    def sigmoid_derivative(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))

    def forward_prop(self, input):
        a = [] # List of activation vectors at each layer.
        z = [] # List of pre-activation vectors at each layer.
        
        a.append(input)
        z.append([0])

        z_temp = np.dot(a[0], self.weights[1]) + self.biases[1]
        a_temp = self.sigmoid(z_temp)

        z.append(z_temp)
        a.append(a_temp)

        z_temp = np.dot(a[1], self.weights[2]) + self.biases[2]
        a_temp = self.sigmoid(z_temp)

        z.append(z_temp)
        a.append(a_temp)

        z_temp = np.dot(a[2], self.weights[3]) + self.biases[3]
        a_temp = self.sigmoid(z_temp)

        z.append(z_temp)
        a.append(a_temp)

        return z, a

    def back_prop(self, z, a, y, alpha):

        # Layer - 3 updates
        error_signal = (a[3] - y) * self.sigmoid_derivative(z[3])

        weights_gradient = np.outer(a[2], error_signal)
        
        self.weights[3] -= alpha * weights_gradient
        self.biases[3] -= alpha * error_signal

        # Layer - 2 updates
        propagated_value = np.dot(self.weights[3], error_signal)
        error_signal = propagated_value * self.sigmoid_derivative(z[2])

        weights_gradient = np.outer(a[1], error_signal)
        
        self.weights[2] -= alpha * weights_gradient
        self.biases[2] -= alpha * error_signal

        # Layer - 1 updates
        propagated_value = np.dot(self.weights[2], error_signal)
        error_signal = propagated_value * self.sigmoid_derivative(z[1])

        weights_gradient = np.outer(a[0], error_signal)
        
        self.weights[1] -= alpha * weights_gradient
        self.biases[1] -= alpha * error_signal

        # The above can be done in a for loop but I have unrolled it for clarity.

        return

    def SGD(self, x_train, y_train, alpha, epochs = 20):
        
        
        for epoch in range(epochs):
            
            shuffled_indices = np.random.permutation(len(x_train))
            x_train = x_train[shuffled_indices]
            y_train = y_train[shuffled_indices]

            print(f"Epoch -> {epoch}")

            sum_of_squares = 0
            
            for i in range(len(x_train)):
                
                z, a = self.forward_prop(x_train[i])
                self.back_prop(z, a, y_train[i], alpha = alpha)
                prediction = np.argmax(a[3])

                squared_error = (a[3] - y_train[i]) ** 2
                sum_of_squares += np.sum(squared_error)

            print(f"Loss = {sum_of_squares / len(x_train)}")


        return 

    def test_model(self, x_test, y_test_original):

        correct = 0

        for i in range(len(x_test)):
            z, a = self.forward_prop(x_test[i])
            prediction = np.argmax(a[3])

            if prediction == y_test_original[i]:
                correct += 1

        print(f"Accuracy = {correct / len(y_test)}")

        return

In [4]:
n = DFN()

n.SGD(x_train, y_train, 0.01, 20)

Epoch -> 0
Loss = 0.7062113717816134
Epoch -> 1
Loss = 0.38057814996535294
Epoch -> 2
Loss = 0.25781398496219554
Epoch -> 3
Loss = 0.20461563351232964
Epoch -> 4
Loss = 0.1747783416054232
Epoch -> 5
Loss = 0.15526513328529332
Epoch -> 6
Loss = 0.1411172998430763
Epoch -> 7
Loss = 0.13015481821693467
Epoch -> 8
Loss = 0.12159163204886647
Epoch -> 9
Loss = 0.11428568580882056
Epoch -> 10
Loss = 0.10805700101169449
Epoch -> 11
Loss = 0.10264390964319356
Epoch -> 12
Loss = 0.09780504269891489
Epoch -> 13
Loss = 0.0935361726107613
Epoch -> 14
Loss = 0.08972659283169092
Epoch -> 15
Loss = 0.08620148583774406
Epoch -> 16
Loss = 0.08303400005961213
Epoch -> 17
Loss = 0.08014441134423024
Epoch -> 18
Loss = 0.07757493064600635
Epoch -> 19
Loss = 0.07493612974915205


In [5]:
n.test_model(x_test, y_test_original)

Accuracy = 0.9484
