Bulid the Neural Network from Scratch

In [2]:
pip install keras


Collecting keras
  Downloading keras-3.5.0-py3-none-any.whl.metadata (5.8 kB)
Collecting absl-py (from keras)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting rich (from keras)
  Downloading rich-13.8.1-py3-none-any.whl.metadata (18 kB)
Collecting namex (from keras)
  Downloading namex-0.0.8-py3-none-any.whl.metadata (246 bytes)
Collecting h5py (from keras)
  Downloading h5py-3.11.0-cp312-cp312-win_amd64.whl.metadata (2.5 kB)
Collecting optree (from keras)
  Downloading optree-0.12.1-cp312-cp312-win_amd64.whl.metadata (48 kB)
     ---------------------------------------- 0.0/48.7 kB ? eta -:--:--
     ------------------------- -------------- 30.7/48.7 kB 1.3 MB/s eta 0:00:01
     ------------------------------- ------ 41.0/48.7 kB 487.6 kB/s eta 0:00:01
     ------------------------------- ------ 41.0/48.7 kB 487.6 kB/s eta 0:00:01
     -------------------------------------- 48.7/48.7 kB 307.6 kB/s eta 0:00:00
Collecting ml-dtypes (from keras)
  Downloading ml_


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:

import numpy as np
from keras.datasets import mnist
from keras.utils import to_categorical


def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))


def cross_entropy_loss(predictions, labels):

    epsilon = 1e-10
    predictions = np.clip(predictions, epsilon, 1 - epsilon)
    return -np.mean(np.sum(labels * np.log(predictions), axis=1))

def cross_entropy_derivative(predictions, labels):
    return predictions - labels

# Xavier Initialization for weights
def xavier_initialization(shape):
    return np.random.randn(*shape) * np.sqrt(1 / shape[0])

# Adam optimizer
class AdamOptimizer:
    def __init__(self, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def update(self, w, dw):
        if self.m is None:
            self.m = np.zeros_like(dw)
            self.v = np.zeros_like(dw)
        self.t += 1
        self.m = self.beta1 * self.m + (1 - self.beta1) * dw
        self.v = self.beta2 * self.v + (1 - self.beta2) * (dw ** 2)
        m_hat = self.m / (1 - self.beta1 ** self.t)
        v_hat = self.v / (1 - self.beta2 ** self.t)
        return w - self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

# Neural Network Class
class NeuralNetwork:
    def __init__(self, layers, learning_rate=0.001, regularization=0.01):
        self.layers = layers
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.weights = [xavier_initialization((layers[i], layers[i+1])) for i in range(len(layers) - 1)]
        self.biases = [np.zeros((1, layers[i+1])) for i in range(len(layers) - 1)]
        self.optimizer_w = [AdamOptimizer(learning_rate) for _ in range(len(self.weights))]
        self.optimizer_b = [AdamOptimizer(learning_rate) for _ in range(len(self.biases))]

    def feedforward(self, X):
        activations = [X]
        z_values = []
        for w, b in zip(self.weights, self.biases):
            z = np.dot(activations[-1], w) + b
            z_values.append(z)
            if len(z_values) == len(self.weights):  # Final layer
                activations.append(softmax(z))  # Use softmax for output layer
            else:
                activations.append(sigmoid(z))  # Use sigmoid for hidden layers
        return activations, z_values

    def backpropagation(self, X, y, activations, z_values):
        dw = [0] * len(self.weights)
        db = [0] * len(self.biases)

        delta = cross_entropy_derivative(activations[-1], y)

        dw[-1] = np.dot(activations[-2].T, delta) + self.regularization * self.weights[-1]
        db[-1] = np.sum(delta, axis=0, keepdims=True)

        # Backpropagate through the rest of the layers
        for l in range(2, len(self.layers)):
            delta = np.dot(delta, self.weights[-l + 1].T) * sigmoid_derivative(z_values[-l])
            dw[-l] = np.dot(activations[-l - 1].T, delta) + self.regularization * self.weights[-l]
            db[-l] = np.sum(delta, axis=0, keepdims=True)

        return dw, db

    def train(self, X, y, epochs, batch_size, X_test, y_test):
        for epoch in range(epochs):
            shuffled_indices = np.random.permutation(X.shape[0])
            X_shuffled = X[shuffled_indices]
            y_shuffled = y[shuffled_indices]

            for i in range(0, X.shape[0], batch_size):
                X_batch = X_shuffled[i:i + batch_size]
                y_batch = y_shuffled[i:i + batch_size]

                activations, z_values = self.feedforward(X_batch)
                dw, db = self.backpropagation(X_batch, y_batch, activations, z_values)


                for idx in range(len(self.weights)):
                    self.weights[idx] = self.optimizer_w[idx].update(self.weights[idx], dw[idx])
                    self.biases[idx] = self.optimizer_b[idx].update(self.biases[idx], db[idx])


            activations, _ = self.feedforward(X)
            loss = cross_entropy_loss(activations[-1], y)
            predictions = np.argmax(activations[-1], axis=1)
            accuracy = np.mean(predictions == np.argmax(y, axis=1))

            # Test accuracy
            test_activations, _ = self.feedforward(X_test)
            test_predictions = np.argmax(test_activations[-1], axis=1)
            test_accuracy = np.mean(test_predictions == np.argmax(y_test, axis=1))

            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}, Accuracy: {accuracy * 100:.2f}%, Test Accuracy: {test_accuracy * 100:.2f}%')

    def predict(self, X):
        activations, _ = self.feedforward(X)
        return np.argmax(activations[-1], axis=1)

# Load and preprocess the MNIST dataset
def load_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
    X_test = X_test.reshape(X_test.shape[0], -1) / 255.0
    y_train = to_categorical(y_train, 10)  # One-hot encoding
    y_test = to_categorical(y_test, 10)
    return X_train, y_train, X_test, y_test


if __name__ == "__main__":
    X_train, y_train, X_test, y_test = load_data()
    nn = NeuralNetwork(layers=[784, 64, 10], learning_rate=0.001, regularization=0.01)
    nn.train(X_train, y_train, epochs=10, batch_size=64, X_test=X_test, y_test=y_test)

    # Predict on test set
    predictions = nn.predict(X_test)
    accuracy = np.mean(predictions == np.argmax(y_test, axis=1))
    print(f'Final Test Accuracy: {accuracy * 100:.2f}%')


ModuleNotFoundError: No module named 'tensorflow'

# Build with Pytorch

In [3]:

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(784, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = self.fc2(x)
        return x


transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


model = NeuralNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


def train_model(num_epochs):
    for epoch in range(num_epochs):
        for images, labels in train_loader:

            images = images.view(-1, 28 * 28)


            outputs = model(images)
            loss = criterion(outputs, labels)


            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')


def test_model():
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.view(-1, 28 * 28)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Test Accuracy: {100 * correct / total:.2f}%')


train_model(10)
test_model()


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 8379405.59it/s] 


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 2030399.99it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 13526093.09it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 6644760.64it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






Epoch 1/10, Loss: 0.1858
Epoch 2/10, Loss: 0.3162
Epoch 3/10, Loss: 0.1671
Epoch 4/10, Loss: 0.0692
Epoch 5/10, Loss: 0.0226
Epoch 6/10, Loss: 0.0820
Epoch 7/10, Loss: 0.0493
Epoch 8/10, Loss: 0.0870
Epoch 9/10, Loss: 0.1022
Epoch 10/10, Loss: 0.1869
Test Accuracy: 96.52%


#pytorch with improvements

In [13]:

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


def load_mnist_data():
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

    return train_loader, test_loader


class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(784, 64)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(64, 10)


        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


model = NeuralNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)


def train_model(num_epochs, train_loader):
    model.train()
    for epoch in range(num_epochs):
        for images, labels in train_loader:

            outputs = model(images)
            loss = criterion(outputs, labels)


            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')


def test_model(test_loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Test Accuracy: {100 * correct / total:.2f}%')


train_loader, test_loader = load_mnist_data()

train_model(10, train_loader)
test_model(test_loader)


Epoch 1/10, Loss: 0.5006
Epoch 2/10, Loss: 0.2899
Epoch 3/10, Loss: 0.1764
Epoch 4/10, Loss: 0.4622
Epoch 5/10, Loss: 0.3744
Epoch 6/10, Loss: 0.0722
Epoch 7/10, Loss: 0.3570
Epoch 8/10, Loss: 0.2022
Epoch 9/10, Loss: 0.0488
Epoch 10/10, Loss: 0.0629
Test Accuracy: 95.96%


## Build the Model using Tensorflow

In [8]:

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 28*28) / 255.0
X_test = X_test.reshape(-1, 28*28) / 255.0

y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)


model = models.Sequential()
model.add(layers.Dense(64, activation='sigmoid', input_shape=(784,)))
model.add(layers.Dense(10, activation='softmax'))
# RMSprop optmizer
model.compile(optimizer='RMSprop', loss='categorical_crossentropy', metrics=['accuracy'])


model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'Test Accuracy: {test_acc * 100:.2f}%')


Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.7993 - loss: 0.8627 - val_accuracy: 0.9183 - val_loss: 0.2862
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9229 - loss: 0.2735 - val_accuracy: 0.9354 - val_loss: 0.2209
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9405 - loss: 0.2095 - val_accuracy: 0.9443 - val_loss: 0.1852
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9501 - loss: 0.1747 - val_accuracy: 0.9504 - val_loss: 0.1619
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9551 - loss: 0.1547 - val_accuracy: 0.9551 - val_loss: 0.1479
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9617 - loss: 0.1316 - val_accuracy: 0.9596 - val_loss: 0.1343
Epoch 7/10
[1m938/938[0m 

# tensor flow with improvements (BONUS)

In [10]:

import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 28*28) / 255.0
X_test = X_test.reshape(-1, 28*28) / 255.0


y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Build the Neural Network model using Keras with adjustments
model = models.Sequential()

#  Use He initialization and reduced L2 regularization
model.add(layers.Dense(64, activation='relu', input_shape=(784,),
                       kernel_initializer='he_normal',
                       kernel_regularizer=regularizers.l2(0.0001)))  # Reduced L2 regularization

# Dropout
model.add(layers.Dropout(0.3))


model.add(layers.Dense(10, activation='softmax',
                       kernel_initializer='glorot_uniform'))

#adam optimizer
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              loss='categorical_crossentropy', metrics=['accuracy'])


model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))


test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'Test Accuracy: {test_acc * 100:.2f}%')


Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.7406 - loss: 0.8984 - val_accuracy: 0.9249 - val_loss: 0.2816
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9094 - loss: 0.3377 - val_accuracy: 0.9406 - val_loss: 0.2248
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9222 - loss: 0.2874 - val_accuracy: 0.9498 - val_loss: 0.1935
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9339 - loss: 0.2449 - val_accuracy: 0.9558 - val_loss: 0.1744
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.9398 - loss: 0.2269 - val_accuracy: 0.9592 - val_loss: 0.1600
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9461 - loss: 0.2104 - val_accuracy: 0.9614 - val_loss: 0.1494
Epoch 7/10
[1m938/938[0m 