**Import Libraries**

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import mnist

**Code**

In [2]:
class SimpleANN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.learning_rate = learning_rate
        self.weights_input_hidden = np.random.randn(hidden_size, input_size) * 0.01
        self.bias_hidden = np.zeros((hidden_size, 1))
        self.weights_hidden_output = np.random.randn(output_size, hidden_size) * 0.01
        self.bias_output = np.zeros((output_size, 1))

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, z):
        return self.sigmoid(z) * (1 - self.sigmoid(z))

    def relu(self, z):
        return np.maximum(0, z)

    def relu_derivative(self, z):
        return (z > 0).astype(float)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z))
        return exp_z / exp_z.sum(axis=0, keepdims=True)

    def forward_propagation(self, X):
        self.Z1 = np.dot(self.weights_input_hidden, X) + self.bias_hidden
        self.A1 = self.relu(self.Z1)
        self.Z2 = np.dot(self.weights_hidden_output, self.A1) + self.bias_output
        self.A2 = self.softmax(self.Z2)
        return self.A2

    def compute_cost(self, Y_hat, Y):
        m = Y.shape[1]
        cost = -np.sum(Y * np.log(Y_hat + 1e-8)) / m
        return np.squeeze(cost)

    def backward_propagation(self, X, Y):
        m = X.shape[1]

        dZ2 = self.A2 - Y
        dW2 = (1 / m) * np.dot(dZ2, self.A1.T)
        db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.dot(self.weights_hidden_output.T, dZ2)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = (1 / m) * np.dot(dZ1, X.T)
        db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

        self.weights_hidden_output -= self.learning_rate * dW2
        self.bias_output -= self.learning_rate * db2
        self.weights_input_hidden -= self.learning_rate * dW1
        self.bias_hidden -= self.learning_rate * db1

    def accuracy(self, Y_hat, Y):
        predictions = np.argmax(Y_hat, axis=0)
        targets = np.argmax(Y, axis=0)
        return np.mean(predictions == targets) * 100

    def train(self, X_train, Y_train, X_val, Y_val, epochs=10):
        for epoch in range(1, epochs + 1):
            Y_hat_train = self.forward_propagation(X_train)
            cost = self.compute_cost(Y_hat_train, Y_train)
            self.backward_propagation(X_train, Y_train)

            if epoch % 1 == 0:  # Print every epoch
                train_accuracy = self.accuracy(Y_hat_train, Y_train)
                Y_hat_val = self.forward_propagation(X_val)
                val_accuracy = self.accuracy(Y_hat_val, Y_val)
                print(f"Epoch {epoch}, Cost: {cost:.4f}, Train Accuracy: {train_accuracy:.2f}%, Validation Accuracy: {val_accuracy:.2f}%")

    def predict(self, X):
        Y_hat = self.forward_propagation(X)
        predictions = np.argmax(Y_hat, axis=0)
        return predictions

In [3]:
# # Load MNIST dataset
# (X_train_full, Y_train_full), (X_test_full, Y_test_full) = mnist.load_data()
# X_train_full = X_train_full.reshape(X_train_full.shape[0], -1).T / 255.0  # Flatten and normalize
# X_test_full = X_test_full.reshape(X_test_full.shape[0], -1).T / 255.0
# Y_train_one_hot = np.eye(10)[Y_train_full]  # One-hot encode labels
# Y_test_one_hot = np.eye(10)[Y_test_full]
# X_train, X_val, Y_train, Y_val = train_test_split(X_train_full.T, Y_train_one_hot, test_size=0.2, random_state=42)

# Random Numbers Dataset
np.random.seed(1)
X = np.random.randn(2, 150).T
Y = np.random.randint(0, 3, 150)
Y_one_hot = np.eye(3)[Y]

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y_one_hot, test_size=0.3, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

X_train, X_val, X_test = X_train.T, X_val.T, X_test.T
Y_train, Y_val, Y_test = Y_train.T, Y_val.T, Y_test.T

ann = SimpleANN(input_size=2, hidden_size=5, output_size=3, learning_rate=0.1)
ann.train(X_train, Y_train, X_val, Y_val, epochs=10)

Y_hat_test = ann.forward_propagation(X_test)
test_accuracy = ann.accuracy(Y_hat_test, Y_test)
print(f"Test Accuracy: {test_accuracy:.2f}%")

Epoch 1, Cost: 1.0986, Train Accuracy: 35.24%, Validation Accuracy: 22.73%
Epoch 2, Cost: 1.0983, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 3, Cost: 1.0979, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 4, Cost: 1.0977, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 5, Cost: 1.0974, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 6, Cost: 1.0971, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 7, Cost: 1.0969, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 8, Cost: 1.0967, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 9, Cost: 1.0964, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Epoch 10, Cost: 1.0962, Train Accuracy: 38.10%, Validation Accuracy: 22.73%
Test Accuracy: 21.74%
