In [1]:
# !pip install kaggle
# !pip install numpy
# !pip install matplotlib
# !pip install struct
# !pip install array

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

Download MNIST Data

In [3]:
!kaggle datasets download -d hojjatk/mnist-dataset

import zipfile
with zipfile.ZipFile("/content/mnist-dataset.zip","r") as zip_ref:
    zip_ref.extractall("mnist-dataset")

Dataset URL: https://www.kaggle.com/datasets/hojjatk/mnist-dataset
License(s): copyright-authors
Downloading mnist-dataset.zip to /content
 95% 21.0M/22.0M [00:01<00:00, 32.0MB/s]
100% 22.0M/22.0M [00:01<00:00, 19.7MB/s]


Load Dataset

In [4]:
import numpy as np
import struct
from array import array
from os.path  import join

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())

        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img

        return np.array(images), np.array(labels).reshape(-1, 1)

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)

In [5]:
import numpy as np
import struct
from array import array
import os
import zipfile

if os.path.exists('mnist-dataset') == False:
    with zipfile.ZipFile("archive.zip","r") as zip_ref:
        zip_ref.extractall("mnist-dataset")

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())

        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img

        return np.array(images), np.array(labels).reshape(-1, 1)

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)

def read_data():
    print('Data ingestion started ->')
    #
    # Verify Reading Dataset via MnistDataloader class
    # Set file paths based on added MNIST Datasets
    #
    input_path = 'mnist-dataset'
    training_images_filepath = os.path.join(input_path, 'train-images-idx3-ubyte/train-images-idx3-ubyte')
    training_labels_filepath = os.path.join(input_path, 'train-labels-idx1-ubyte/train-labels-idx1-ubyte')
    test_images_filepath = os.path.join(input_path, 't10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
    test_labels_filepath = os.path.join(input_path, 't10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')

    #
    # Load MINST dataset
    #
    mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
    (x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

    print('Data ingestion complete!->')
    return (x_train, y_train), (x_test, y_test)

Model Development from scratch


In [6]:
from dataclasses import dataclass, field

@dataclass
class NeuralNet:
    x: np.ndarray
    y: np.ndarray

    input_size: int
    hidden_layers: list
    output_size: int

    activations: list[str] = field(default_factory=list)
    weights: list = field(default_factory=list)
    biases: list = field(default_factory=list)
    learning_rate: float = 0.01

    def __post_init__(self):
        # Flatten the input if it's multi-dimensional
        if len(self.x.shape) > 2:
            self.x = self.x.reshape(self.x.shape[0], -1)
            # Update input size to match flattened dimension
            self.input_size = self.x.shape[1]
            print(f"Input data flattened to shape {self.x.shape}")

        # convert y to one-hot encoding if needed
        if len(self.y.shape) == 1 or self.y.shape[1] == 1:
            num_classes = max(self.y.flatten()) + 1
            y_one_hot = np.zeros((len(self.y), num_classes))
            y_one_hot[np.arange(len(self.y)), self.y.flatten().astype(int)] = 1
            self.y = y_one_hot

        # update output size to match y shape if needed
        if self.output_size != self.y.shape[1]:
            self.output_size = self.y.shape[1]
            print(f"Output size adjusted to {self.output_size} to match target shape")

        # initialising list of all layers
        self.layer_sizes = [self.input_size] + self.hidden_layers + [self.output_size]

        # initializing weights and biases with correct shapes
        for i in range(len(self.layer_sizes) - 1):
            self.weights.append(np.random.randn(self.layer_sizes[i], self.layer_sizes[i + 1]) * np.sqrt(2.0 / self.layer_sizes[i]))
            self.biases.append(np.zeros((1, self.layer_sizes[i + 1])))

    def activation_func(self, inputs, activation_func):
        if activation_func == 'sigmoid':
            return 1 / (1 + np.exp(-inputs))
        if activation_func == 'softmax':
            return self.softmax(inputs)
        if activation_func == 'tanh':
            return np.tanh(inputs)
        if activation_func == 'relu':
            return np.maximum(0, inputs)
        if activation_func == 'leaky_relu':
            return np.maximum(0.01 * inputs, inputs)

    def activation_derivative(self, inputs, func):
        if func == 'sigmoid':
            return inputs * (1 - inputs)
        if func == 'softmax':
            return 1
        if func == 'tanh':
            return 1 - inputs ** 2
        if func == 'relu':
            return np.where(inputs > 0, 1, 0)
        if func == 'leaky_relu':
            return np.where(inputs > 0, 1, 0.01)

    def softmax(self, inputs):
        inputs = np.clip(inputs, -500, 500)
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        return exp_values / np.sum(exp_values, axis=1, keepdims=True)

    def neuron_activation(self, inputs, weights, bias, activation_func):
        return self.activation_func(np.dot(inputs, weights) + bias, activation_func=activation_func)

    def cross_entropy_loss(self, y_true, y_pred):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def feedforward(self):
        self.layer_outputs = [self.x]
        current_input = self.x

        for i in range(len(self.weights)):
            current_input = self.neuron_activation(
                current_input,
                weights=self.weights[i],
                bias=self.biases[i],
                activation_func=self.activations[i]
            )
            self.layer_outputs.append(current_input)

        return current_input

    def backpropagation(self):
        # Forward pass
        self.y_pred = self.feedforward()

        # Calculate loss
        loss = self.cross_entropy_loss(self.y, self.y_pred)
        batch_size = self.y.shape[0]
        deltas = [(self.y_pred - self.y) / batch_size]

        # Backpropagate through layers
        for i in reversed(range(len(self.weights) - 1)):
            delta = np.dot(deltas[0], self.weights[i + 1].T) * self.activation_derivative(
                self.layer_outputs[i + 1], self.activations[i])
            deltas.insert(0, delta)

        # Update weights and biases
        for i in range(len(self.weights)):
            d_weight = np.dot(self.layer_outputs[i].T, deltas[i])
            d_bias = np.sum(deltas[i], axis=0, keepdims=True)

            # Clip gradients to prevent exploding gradients
            d_weight = np.clip(d_weight, -1.0, 1.0)
            d_bias = np.clip(d_bias, -1.0, 1.0)

            self.weights[i] -= self.learning_rate * d_weight
            self.biases[i] -= self.learning_rate * d_bias

        return loss

    def fit(self, epochs):
        losses = []
        for i in range(1, epochs + 1):
            loss = self.backpropagation()
            losses.append(loss)
            print(f"Epoch {i}/{epochs} - Loss: {loss:.8f}")
        return losses

    def predict(self, X):
        if len(X.shape) > 2:
            X = X.reshape(X.shape[0], -1)

        original_x = self.x
        self.x = X
        predictions = self.feedforward()
        self.x = original_x
        return predictions

In [7]:
# Load the data
(x_train, y_train), (x_test, y_test) = read_data()
print(f"Input shape -> {x_train.shape}")

# Shuffle data while keeping images and labels together
train_indices = np.arange(len(x_train))
test_indices = np.arange(len(x_test))

np.random.shuffle(train_indices)
np.random.shuffle(test_indices)

x_train, y_train = x_train[train_indices], y_train[train_indices]
x_test, y_test = x_test[test_indices], y_test[test_indices]

print('Data shuffled ->')

# Normalize data
x_train = x_train.astype(np.float32) / 255.0
x_test = x_test.astype(np.float32) / 255.0

# Ensure labels are one-hot encoded for the network
num_classes = 10
y_train_one_hot = np.zeros((len(y_train), num_classes))
y_train_one_hot[np.arange(len(y_train)), y_train] = 1

y_test_one_hot = np.zeros((len(y_test), num_classes))
y_test_one_hot[np.arange(len(y_test)), y_test] = 1

Data ingestion started ->
Data ingestion complete!->
Input shape -> (60000, 28, 28)
Data shuffled ->


In [8]:
print("Input:", x_train.shape)
print("Output:", y_train.shape)

Input: (60000, 28, 28)
Output: (60000, 1)


In [13]:
# Define network parameters
hidden_layers = [32, 64, 32]
activations = ['relu', 'relu', 'relu', 'softmax']

# Reduced learning rate for better convergence
nn = NeuralNet(
    x=x_train,
    y=y_train_one_hot,
    input_size=784,
    hidden_layers=hidden_layers,
    activations=activations,
    output_size=10,
    learning_rate=0.01  # Changed to a lower learning rate
)

# Train the network
losses = nn.fit(epochs=100)

# Logging the loss values
for epoch, loss in enumerate(losses, 1):
    print(f"Epoch {epoch} - Loss: {loss:.8f}")

# Training set predictions
train_predictions = nn.predict(x_train)
train_predicted_classes = np.argmax(train_predictions, axis=1)

# Test set predictions
test_predictions = nn.predict(x_test)
test_predicted_classes = np.argmax(test_predictions, axis=1)

Input data flattened to shape (60000, 784)
Epoch 1/100 - Loss: 23.26153206
Epoch 2/100 - Loss: 23.39531677
Epoch 3/100 - Loss: 23.69723369
Epoch 4/100 - Loss: 24.28743093
Epoch 5/100 - Loss: 25.53152323
Epoch 6/100 - Loss: 28.20032981
Epoch 7/100 - Loss: 33.27750712
Epoch 8/100 - Loss: 41.87138661
Epoch 9/100 - Loss: 55.92243615
Epoch 10/100 - Loss: 78.57556418
Epoch 11/100 - Loss: 111.85135250
Epoch 12/100 - Loss: 153.84601015
Epoch 13/100 - Loss: 195.10487634
Epoch 14/100 - Loss: 225.00487555
Epoch 15/100 - Loss: 247.11804948
Epoch 16/100 - Loss: 262.44829516
Epoch 17/100 - Loss: 271.43088402
Epoch 18/100 - Loss: 274.82696031
Epoch 19/100 - Loss: 264.20136445
Epoch 20/100 - Loss: 221.19466354
Epoch 21/100 - Loss: 151.22900279
Epoch 22/100 - Loss: 87.70849890
Epoch 23/100 - Loss: 50.04686503
Epoch 24/100 - Loss: 32.71370129
Epoch 25/100 - Loss: 25.78906021
Epoch 26/100 - Loss: 23.62980572
Epoch 27/100 - Loss: 23.14611186
Epoch 28/100 - Loss: 23.03917824
Epoch 29/100 - Loss: 23.0258772

In [10]:
# training results
print("\nTraining Set Results:")
correct_train_predictions = 0
for i in range(len(train_predicted_classes)):
  if  train_predicted_classes[i] == y_train[i]:
    correct_train_predictions += 1
train_accuracy = (correct_train_predictions / len(y_train) * 100)
print(f"Training Accuracy: {train_accuracy:.2f}%")

# test results
print("\nTest Set Results:")
correct_test_predictions = 0
for i in range(len(test_predicted_classes)):
  if  test_predicted_classes[i] == y_test[i]:
    correct_test_predictions += 1
test_accuracy = (correct_test_predictions / len(y_test) * 100)
print(f"Test Accuracy: {test_accuracy:.2f}%")

print(correct_train_predictions)
print(correct_test_predictions)


Training Set Results:
Training Accuracy: 9.87%

Test Set Results:
Test Accuracy: 9.80%
5923
980


In [11]:
print(x_train.shape, y_train.shape)

(60000, 28, 28) (60000, 1)


Tensorflow

In [15]:
import tensorflow as tf
from tensorflow import keras


model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [17]:
model.fit(x_train, y_train, epochs=100)

Epoch 1/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8041 - loss: 0.6973
Epoch 2/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9489 - loss: 0.1753
Epoch 3/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9613 - loss: 0.1309
Epoch 4/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9674 - loss: 0.1094
Epoch 5/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9718 - loss: 0.0931
Epoch 6/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9759 - loss: 0.0787
Epoch 7/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9775 - loss: 0.0709
Epoch 8/100
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9800 - loss: 0.0629
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x7ea550207550>

In [18]:
model.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9706 - loss: 0.3524


[0.33195847272872925, 0.9700999855995178]