In [1]:
from keras.datasets import mnist
import numpy as np

# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the images to have values between -0.5 and 0.5
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

# Flatten the images from 28x28 pixels to 784-dimensional vectors
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))

# Print the shape of datasets to confirm the size
print("Training images shape:", train_images.shape)  # Should be (60000, 784 = 28 * 28)
print("Test images shape:", test_images.shape) 




Training images shape: (60000, 784)
Test images shape: (10000, 784)


# The new Algorithm is the combination of Mini-Batch Stochastic Gradient Descent (MB-SGD) and Adaptive Moment Estimation (Adam) optimization technique 

In [2]:
def relu(x):
    # ReLU activation function that zeros out negative values
    return np.maximum(x, 0)

def softmax(x):
    # Softmax activation function for the output layer
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Improve numerical stability
    return e_x / np.sum(e_x, axis=1, keepdims=True)

def deriv_relu(x):
    # Derivative of ReLU function for backpropagation
    return x > 0

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.bias1 = np.zeros((1, hidden_size))
        self.weights1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.weights2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.bias2 = np.zeros((1, output_size))

    def forward(self, x):
        # Forward pass through the network
        self.z1 = np.dot(x, self.weights1) + self.bias1        # First linear step
        self.a1 = relu(self.z1)                                # Activation function
        self.z2 = np.dot(self.a1, self.weights2) + self.bias2  # Second linear step
        self.a2 = softmax(self.z2)                             # Output activation function
        return self.a2

    def compute_loss(self, y_true, y_pred):
        # Compute the cross-entropy loss
        m = y_true.shape[0]
        log_likelihood = -np.log(y_pred[range(m), y_true])
        loss = np.sum(log_likelihood) / m
        return loss

    def backprop(self, x, y_true):
        # Backpropagation to compute gradients
        m = y_true.shape[0]

        # Gradients for output layer
        delta_z2 = self.a2
        delta_z2[range(m), y_true] -= 1  # Derivative of cross-entropy with softmax
        delta_z2 /= m
        dw2 = np.dot(self.a1.T, delta_z2)
        db2 = np.sum(delta_z2, axis=0, keepdims=True)

        # Gradients for hidden layer
        delta_a1 = np.dot(delta_z2, self.weights2.T)
        delta_z1 = delta_a1 * deriv_relu(self.z1)  # Element-wise multiplication
        dw1 = np.dot(x.T, delta_z1)
        db1 = np.sum(delta_z1, axis=0)

        # Update weights and biases
        self.weights1 -= learning_rate * dw1
        self.bias1 -= learning_rate * db1
        self.weights2 -= learning_rate * dw2
        self.bias2 -= learning_rate * db2

    def predict(self, x):
        # Predict labels for given input
        a2 = self.forward(x)
        return np.argmax(a2, axis=1)  # Return the index of highest probability

    def train(self, x, y, epochs, learning_rate):
        for epoch in range(epochs):
            y_pred = self.forward(x)             # Forward pass
            loss = self.compute_loss(y, y_pred)  # Compute loss
            self.backprop(x, y)                  # Backpropagation
            
            if epoch % 5 == 0:  # Print loss and accuracy every 5 epochs
                predictions = self.predict(x)
                accuracy = np.mean(predictions == y)
                print(f"Epoch: {epoch}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

In [3]:
# Hyperparameters
learning_rate = 0.01
epochs = 500
hidden_size = 128

# Initialize the neural network
nn = NeuralNetwork(784, hidden_size, 10)

# Train the neural network using the training data
nn.train(train_images, train_labels, epochs, learning_rate)


Epoch: 0, Loss: 2.4128, Accuracy: 0.0824
Epoch: 5, Loss: 2.2679, Accuracy: 0.1704
Epoch: 10, Loss: 2.1851, Accuracy: 0.2277
Epoch: 15, Loss: 2.1204, Accuracy: 0.2784
Epoch: 20, Loss: 2.0641, Accuracy: 0.3250
Epoch: 25, Loss: 2.0126, Accuracy: 0.3673
Epoch: 30, Loss: 1.9639, Accuracy: 0.4051
Epoch: 35, Loss: 1.9171, Accuracy: 0.4369
Epoch: 40, Loss: 1.8717, Accuracy: 0.4661
Epoch: 45, Loss: 1.8278, Accuracy: 0.4929
Epoch: 50, Loss: 1.7851, Accuracy: 0.5172
Epoch: 55, Loss: 1.7438, Accuracy: 0.5405
Epoch: 60, Loss: 1.7039, Accuracy: 0.5617
Epoch: 65, Loss: 1.6653, Accuracy: 0.5813
Epoch: 70, Loss: 1.6280, Accuracy: 0.5996
Epoch: 75, Loss: 1.5921, Accuracy: 0.6151
Epoch: 80, Loss: 1.5575, Accuracy: 0.6294
Epoch: 85, Loss: 1.5242, Accuracy: 0.6427
Epoch: 90, Loss: 1.4922, Accuracy: 0.6545
Epoch: 95, Loss: 1.4613, Accuracy: 0.6657
Epoch: 100, Loss: 1.4316, Accuracy: 0.6761
Epoch: 105, Loss: 1.4031, Accuracy: 0.6848
Epoch: 110, Loss: 1.3755, Accuracy: 0.6933
Epoch: 115, Loss: 1.3491, Accurac

In [4]:
# Make predictions on the test dataset
predictions = nn.predict(test_images)

# Calculate accuracy by comparing to the true labels
accuracy = np.mean(predictions == test_labels)

# Print the accuracy
print("Test accuracy:", accuracy)


Test accuracy: 0.858
