<a href="https://colab.research.google.com/github/bertankofon/CharacterRecognition/blob/main/MiniProject1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Data Preparation

In [1]:
import time
import numpy as np
from keras.datasets import mnist

# Load the dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

# Flatten the images.
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))

print("Training images shape:", train_images.shape) # Should be (60000, 784)
print("Test images shape:", test_images.shape) # Should be (10000, 784)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Training images shape: (60000, 784)
Test images shape: (10000, 784)


#Model Implemantation and Training

Case 1: Use tanh for all activations.

Case 2: Use ReLU for the hidden layer activation and sigmoid for the output layer activation.

Define Activation Functions and their derivatives

In [2]:
# Tanh activation and its derivative
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

# ReLU activation and its derivative
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

# Define the sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of the sigmoid function for backpropagation
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))



**Case 1: Use tanh for all activations.**

Define hyperparameters for case 1

In [None]:
# Define the learning rate
learning_rate = 1e-2 # CHANGABLE: 1e-2, 5e-2, 9e-2: 0.01, 0.05, 0.09

hidden_size = 300  # N = (300, 500 or 1000) CHANGABLE


In [5]:
# Set the random seed for reproducibility
np.random.seed(42)

# Initialize parameters to random values
input_size = 784  # MNIST images are 28x28 pixels

num_classes = 10  # There are 10 classes for the digits 0-9

# Weights and biases for the hidden layer
W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))

# Weights and biases for the output layer
W2 = np.random.randn(hidden_size, num_classes) * 0.01
b2 = np.zeros((1, num_classes))



# Forward pass using tanh
def forward_pass_tanh(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = tanh(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = tanh(Z2)
    return Z1, A1, Z2, A2

# Convert labels to the desired target outputs for case 1 with tanh
def convert_labels_for_tanh(Y, num_classes=10):
    # Initialize the matrix with -1 (since tanh outputs are in the range [-1, 1])
    targets = -np.ones((Y.size, num_classes))
    # Set the appropriate element for each label to 1
    for index, label in enumerate(Y):
        targets[index, label] = 1
    return targets

# Modified labels for tanh
train_labels_tanh = convert_labels_for_tanh(train_labels)

# Mean Squared Error Loss function
def mse_loss(Y, A2):
    m = Y.shape[0]
    loss = np.sum((Y - A2)**2) / m
    return loss



# Backward pass
def backward_pass_tanh(X, Y, Z1, A1, W2, A2):
    m = X.shape[0]

    # Gradient of MSE loss with respect to A2
    dA2 = 2 * (A2 - Y) / m

    # Gradient of the tanh activation function
    dZ2 = dA2 * (1 - np.square(np.tanh(Z2)))

    # Calculate gradients for W2 and b2
    dW2 = np.dot(A1.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    # Calculate gradients for W1 and b1
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * (1 - np.square(np.tanh(Z1)))
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    return dW1, db1, dW2, db2

# Parameters update function
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2

# Function to predict labels for a dataset
def predict(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_pass_tanh(X, W1, b1, W2, b2)
    predictions = np.argmax(A2, axis=1)  # Class with the highest score is the prediction
    return predictions

# Function to calculate accuracy
def calculate_accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# Function to compute the test error
def compute_test_error(X, y, W1, b1, W2, b2):
    y_pred = predict(X, W1, b1, W2, b2)
    accuracy = calculate_accuracy(y, y_pred)
    return 1 - accuracy  # Test error is 1 - accuracy

# Initialize lists to store metrics
train_errors = []
test_errors = []
accuracies = []





Training for case 1

In [None]:
# Training loop
num_epochs = 10  # CHANGE

# Training loop

# Start timing the training process
start_time = time.time()

for epoch in range(num_epochs):
    # Forward pass
    Z1, A1, Z2, A2 = forward_pass_tanh(train_images, W1, b1, W2, b2)

    # Compute loss
    train_loss = mse_loss(train_labels_tanh, A2)
    train_errors.append(train_loss)

    # Compute training accuracy
    train_pred = predict(train_images, W1, b1, W2, b2)
    train_accuracy = calculate_accuracy(train_labels, train_pred)
    accuracies.append(train_accuracy)

    # Compute test error
    test_error = compute_test_error(test_images, test_labels, W1, b1, W2, b2)
    test_errors.append(test_error)

    # Output the metrics
    print(f'Epoch {epoch + 1}/{num_epochs}, Training Loss: {train_loss}, Training Accuracy: {train_accuracy}, Test Error: {test_error}')

    # Backward pass
    dW1, db1, dW2, db2 = backward_pass_tanh(train_images, train_labels_tanh, Z1, A1, W2, A2)

    # Update parameters
    W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)

# End timing the training process
end_time = time.time()

# Calculate and print total CPU time used for training
cpu_time = end_time - start_time
print(f'Total CPU time for training: {cpu_time:.2f} seconds')

Epoch 1/10, Training Loss: 3.999999997138255, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 2/10, Training Loss: 3.9999999971382523, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 3/10, Training Loss: 3.999999997138252, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 4/10, Training Loss: 3.9999999971382505, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 5/10, Training Loss: 3.999999997138249, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 6/10, Training Loss: 3.999999997138247, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 7/10, Training Loss: 3.999999997138246, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 8/10, Training Loss: 3.999999997138245, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 9/10, Training Loss: 3.999999997138243, Training Accuracy: 0.09863333333333334, Test Error: 0.9042
Epoch 10/10, Training Loss: 3.9999999971382425, Train

**Case 2: Use ReLU for the hidden layer activation and sigmoid for the output layer activation.**

Define hyperparameters for case 2

In [None]:
# Define the learning rate
learning_rate = 1e-2 # CHANGABLE: 1e-2, 5e-2, 9e-2: 0.01, 0.05, 0.09

hidden_size = 300  # N = (300, 500 or 1000) CHANGABLE

In [None]:
# Set the random seed for reproducibility
np.random.seed(42)

# Initialize parameters to random values
input_size = 784  # MNIST images are 28x28 pixels

num_classes = 10  # There are 10 classes for the digits 0-9

# Weights and biases for the hidden layer
W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))

# Weights and biases for the output layer
W2 = np.random.randn(hidden_size, num_classes) * 0.01
b2 = np.zeros((1, num_classes))



# Forward pass using ReLU and sigmoid
def forward_pass_relu_sigmoid(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    return Z1, A1, Z2, A2

# Convert labels to the desired target outputs for case 2 with sigmoid
def convert_labels_for_sigmoid(Y, num_classes=10):
    # Initialize the matrix with 0 (since sigmoid outputs are in the range [0, 1])
    targets = np.zeros((Y.size, num_classes))
    # Set the appropriate element for each label to 1
    for index, label in enumerate(Y):
        targets[index, label] = 1
    return targets


# Modified labels for tanh
train_labels_sigmoid = convert_labels_for_sigmoid(train_labels)


# Mean Squared Error Loss function
def mse_loss(Y, A2):
    m = Y.shape[0]
    loss = np.sum((Y - A2)**2) / m
    return loss


# Backward pass using ReLU and sigmoid
def backward_pass_relu_sigmoid(X, Y, Z1, A1, W2, A2):
    m = X.shape[0]

    # Gradient for W2 and b2
    dZ2 = (A2 - Y) * sigmoid_derivative(Z2)
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    # Gradient for W1 and b1
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2

# Parameters update function
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2



Epoch 1, cost: 2.484944395132934
Epoch 2, cost: 2.3418036565392546
Epoch 3, cost: 2.1770137719519593
Epoch 4, cost: 1.9563515856664788
Epoch 5, cost: 1.6787903895018392
Epoch 6, cost: 1.3940655779708835
Epoch 7, cost: 1.1761348090037211
Epoch 8, cost: 1.0482565469151446
Epoch 9, cost: 0.9821058962071338
Epoch 10, cost: 0.9478471339374868


Training for Case 2

In [None]:
# Training loop
num_epochs = 10  # CHANGE
for epoch in range(num_epochs):
    # Forward pass
    Z1, A1, Z2, A2 = forward_pass_relu_sigmoid(train_images, W1, b1, W2, b2)

    # Compute loss
    cost = mse_loss(train_labels_sigmoid, A2)
    print(f'Epoch {epoch + 1}, mse_error: {cost}')

     # Backward pass
    dW1, db1, dW2, db2 = backward_pass_relu_sigmoid(train_images, train_labels_sigmoid, Z1, A1, W2, A2)

    # Update parameters
    W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)