<a href="https://colab.research.google.com/github/Yug3737/Character-Recognition/blob/main/Digit_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from tensorflow.keras.datasets import mnist

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the input data to [0, 1]
x_train = x_train / 255.0
x_test = x_test / 255.0

# One-hot encode the labels
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

# Flatten the images
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)

# Activation functions and derivatives
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability adjustment
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Neural Network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size):
        self.weights1 = np.random.randn(input_size, hidden_sizes[0]) * 0.01
        self.bias1 = np.zeros((1, hidden_sizes[0]))

        self.weights2 = np.random.randn(hidden_sizes[0], hidden_sizes[1]) * 0.01
        self.bias2 = np.zeros((1, hidden_sizes[1]))

        self.weights3 = np.random.randn(hidden_sizes[1], output_size) * 0.01
        self.bias3 = np.zeros((1, output_size))

        self.learning_rate = 0.01

    # Forward propagation
    def forward(self, X):
        self.preact1 = np.dot(X, self.weights1) + self.bias1
        self.postact1 = relu(self.preact1)

        self.preact2 = np.dot(self.postact1, self.weights2) + self.bias2
        self.postact2 = relu(self.preact2)

        self.preact3 = np.dot(self.postact2, self.weights3) + self.bias3
        self.postact3 = softmax(self.preact3)

        return self.postact3

    # Backpropagation
    def backward(self, X, y, y_pred):
        m = y.shape[0]  # Number of samples

        # Gradients for the output layer
        d_preact3 = y_pred - y
        d_weights3 = np.dot(self.postact2.T, d_preact3) / m
        d_bias3 = np.sum(d_preact3, axis=0, keepdims=True) / m

        # Gradients for the second hidden layer
        d_postact2 = np.dot(d_preact3, self.weights3.T)
        d_preact2 = d_postact2 * relu_derivative(self.preact2)
        d_weights2 = np.dot(self.postact1.T, d_preact2) / m
        d_bias2 = np.sum(d_preact2, axis=0, keepdims=True) / m

        # Gradients for the first hidden layer
        d_postact1 = np.dot(d_preact2, self.weights2.T)
        d_preact1 = d_postact1 * relu_derivative(self.preact1)
        d_weights1 = np.dot(X.T, d_preact1) / m
        d_bias1 = np.sum(d_preact1, axis=0, keepdims=True) / m

        # Update weights and biases
        self.weights3 -= self.learning_rate * d_weights3
        self.bias3 -= self.learning_rate * d_bias3
        self.weights2 -= self.learning_rate * d_weights2
        self.bias2 -= self.learning_rate * d_bias2
        self.weights1 -= self.learning_rate * d_weights1
        self.bias1 -= self.learning_rate * d_bias1

    # Training function
    def train(self, X, y, epochs, batch_size):
        for epoch in range(epochs):
            # Shuffle data
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            # Mini-batch gradient descent
            for i in range(0, X.shape[0], batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]

                # Forward and backward propagation
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)

            # Compute loss for the epoch
            y_pred = self.forward(X)
            loss = -np.mean(np.sum(y * np.log(y_pred + 1e-8), axis=1))  # Categorical cross-entropy
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    # Predict function
    def predict(self, X):
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)

# Initialize and train the network
input_size = 784  # Flattened image size
hidden_sizes = [128, 64]  # Number of neurons in hidden layers
output_size = 10# Number of output classes
epochs = 100
batch_size = 64

nn = NeuralNetwork(input_size, hidden_sizes, output_size)
nn.train(x_train, y_train, epochs=epochs, batch_size=batch_size)

# Evaluate the model on the test data
y_test_pred = nn.predict(x_test)
y_test_true = np.argmax(y_test, axis=1)

# Calculate accuracy
accuracy = np.mean(y_test_pred == y_test_true)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 1/100, Loss: 2.3009
Epoch 2/100, Loss: 2.2992
Epoch 3/100, Loss: 2.2892
Epoch 4/100, Loss: 1.8888
Epoch 5/100, Loss: 0.7884
Epoch 6/100, Loss: 0.6371
Epoch 7/100, Loss: 0.5636
Epoch 8/100, Loss: 0.5034
Epoch 9/100, Loss: 0.4621
Epoch 10/100, Loss: 0.4173
Epoch 11/100, Loss: 0.3852
Epoch 12/100, Loss: 0.3548
Epoch 13/100, Loss: 0.3306
Epoch 14/100, Loss: 0.3066
Epoch 15/100, Loss: 0.2867
Epoch 16/100, Loss: 0.2664
Epoch 17/100, Loss: 0.2497
Epoch 18/100, Loss: 0.2348
Epoch 19/100, Loss: 0.2195
Epoch 20/100, Loss: 0.2071
Epoch 21/100, Loss: 0.1967
Epoch 22/100, Loss: 0.1862
Epoch 23/100, Loss: 0.1797
Epoch 24/100, Loss: 0.1755
Epoch 25/100, Loss: 0.1635
Epoch 26/100, Loss: 0.1579
Epoch 27/100, Loss: 0.1507
Epoch 28/100, Loss: 0.1436
Epoch 29/100, Loss: 0.1410
Epoch 30/100, Loss: 0.1338
Epoch 31/100, Loss: 0.1286
Epoch 32/100, Loss: 0.1236
Epoch 33/100, Loss: 0.1213
Epoch 34/100, Loss: 0.1162
Epoch 35/100, Loss: 0.1139
Epoch 36/100, Loss: 0.1096
Epoch 37/100, Loss: 0.1063
Epoch 38/1