In [1]:
import numpy as np  # For matrix and numerical operations
from tensorflow.keras.datasets import mnist  # To load the MNIST dataset

In [2]:
import numpy as np
from tensorflow.keras.datasets import mnist

# Load MNIST data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten images (28x28 -> 784) and normalize values to [0, 1]
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0

# One-hot encode the labels
def one_hot_encode(y, num_classes=10):
    encoded = np.zeros((y.size, num_classes))
    encoded[np.arange(y.size), y] = 1
    return encoded

y_train = one_hot_encode(y_train)
y_test = one_hot_encode(y_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [14]:
input_size=784
hidden1_size=256
hidden2_size=64
output_size =10

In [15]:
# Initialize model parameters
np.random.seed(42)
W1 = np.random.randn(input_size, hidden1_size) * 0.1
b1 = np.zeros((1, hidden1_size))
W2 = np.random.randn(hidden1_size, hidden2_size) * 0.1
b2 = np.zeros((1, hidden2_size))
W3 = np.random.randn(hidden2_size, output_size) * 0.1
b3 = np.zeros((1, output_size))

# Activation functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability trick
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Forward propagation
def forward_propagation(X):
    global Z1, A1, Z2, A2, Z3, A3
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = relu(Z2)
    Z3 = np.dot(A2, W3) + b3
    A3 = softmax(Z3)
    return A3

# Backward propagation
def backward_propagation(X, y, output):
    global W1, b1, W2, b2, W3, b3
    m = X.shape[0]

    # Gradients for output layer
    dZ3 = output - y
    dW3 = np.dot(A2.T, dZ3) / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    # Gradients for second hidden layer
    dA2 = np.dot(dZ3, W3.T)
    dZ2 = dA2 * relu_derivative(Z2)
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    # Gradients for first hidden layer
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3

# Training
epochs = 20
learning_rate = 0.1

for epoch in range(epochs):
    # Forward and backward propagation for the entire training set
    output = forward_propagation(X_train)
    backward_propagation(X_train, y_train, output)

    # Compute loss for the entire training set
    loss = -np.mean(np.sum(y_train * np.log(output + 1e-8), axis=1))
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss}")



Epoch 1/20, Loss: 2.4182752054605934
Epoch 2/20, Loss: 2.3023976364103453
Epoch 3/20, Loss: 2.228589369923908
Epoch 4/20, Loss: 2.1650178320797373
Epoch 5/20, Loss: 2.1052081799549525
Epoch 6/20, Loss: 2.046502230239699
Epoch 7/20, Loss: 1.9876206551384195
Epoch 8/20, Loss: 1.9278937030102299
Epoch 9/20, Loss: 1.8670939799564765
Epoch 10/20, Loss: 1.8052113270818206
Epoch 11/20, Loss: 1.7424944686387975
Epoch 12/20, Loss: 1.679378925725385
Epoch 13/20, Loss: 1.6163256710837985
Epoch 14/20, Loss: 1.5539188894487852
Epoch 15/20, Loss: 1.4927364441391557
Epoch 16/20, Loss: 1.4332208149399888
Epoch 17/20, Loss: 1.3758321768311206
Epoch 18/20, Loss: 1.3209183542895189
Epoch 19/20, Loss: 1.2687163049324492
Epoch 20/20, Loss: 1.2193862285001775


In [16]:
# Test accuracy
output_test = forward_propagation(X_test)
predictions = np.argmax(output_test, axis=1)
labels = np.argmax(y_test, axis=1)
accuracy = np.mean(predictions == labels) * 100
print(f"Test Accuracy: {accuracy}%")

Test Accuracy: 74.4%


In [None]:
# Training function with added features
epochs = 100  # Total number of epochs
learning_rate = 0.01  # Learning rate

for epoch in range(epochs):
    # Perform forward propagation
    output = forward_propagation(X_train)

    # Calculate loss (Cross-Entropy)
    loss = -np.mean(np.sum(y_train * np.log(output + 1e-8), axis=1))

    # Perform backward propagation to adjust weights and biases
    backward_propagation(X_train, y_train, output)

    # Print average loss for the current epoch
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss}")

# Saving the model parameters after training
np.save('W1.npy', W1)
np.save('b1.npy', b1)
np.save('W2.npy', W2)
np.save('b2.npy', b2)
np.save('W3.npy', W3)
np.save('b3.npy', b3)

# Save everything in a single .npz file for easier reloading
np.savez('model_parameters.npz', W1=W1, b1=b1, W2=W2, b2=b2, W3=W3, b3=b3)

print("Model training complete. Parameters saved successfully.")


In [None]:
# Load the saved model parameters
model_parameters = np.load('model_parameters.npz')

# Load weights and biases from the saved file
W1 = model_parameters['W1']
b1 = model_parameters['b1']
W2 = model_parameters['W2']
b2 = model_parameters['b2']
W3 = model_parameters['w3']
b3 = model_parameters['b3']

print("Model loaded successfully!")

# Define the testing function
def test_model(X_test, y_test):
    """
    Test the model on the test data and calculate accuracy.
    """
    # Forward pass using the trained model parameters
    def forward_pass(X):
        Z1 = np.dot(X, W1) + b1
        A1 = relu(Z1)

        Z2 = np.dot(A1, W2) + b2
        A2 = relu(Z2)

        Z3 = np.dot(A2, W3) + b3
        A3 = softmax(Z3)
        return A3

    # Make predictions
    outputs = forward_pass(X_test)
    predicted = np.argmax(outputs, axis=1)  # Get the class with the highest probability

    # Convert one-hot encoded labels to class indices
    true_labels = np.argmax(y_test, axis=1)

    # Calculate accuracy
    accuracy = np.mean(predicted == true_labels) * 100

    # Print accuracy
    print(f"Test Accuracy: {accuracy:.2f}%")

# Call the test function with your test data
test_model(X_test, y_test)
