In [None]:
from keras.datasets import mnist
from sklearn.metrics import confusion_matrix

In [None]:
import numpy as np
import pandas as pd

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    exps = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exps / np.sum(exps, axis=0, keepdims=True)

def forward_propagation(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, x):
    # Forward pass
    hidden_input = np.dot(weights_input_hidden, x) + biases_input_hidden
    hidden_output = sigmoid(hidden_input)
    final_input = np.dot(weights_hidden_output, hidden_output) + biases_hidden_output
    final_output = softmax(final_input)
    return hidden_input, hidden_output, final_input, final_output

def backward_propagation(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, x, y, hidden_input, hidden_output, final_input, final_output, learning_rate):
    m = x.shape[1]  # Number of samples

    # Compute error and cost (Cross-entropy loss)
    error = final_output - y
    cost = 0.5 * np.sum(error ** 2)

    # Backward pass
    delta_output = final_output - y
    delta_hidden = np.dot(weights_hidden_output.T, delta_output) * hidden_output * (1 - hidden_output)

    # Gradient for weights and biases
    grad_weights_hidden_output = np.dot(delta_output, hidden_output.T) / m
    grad_biases_hidden_output = np.sum(delta_output, axis=1, keepdims=True) / m
    grad_weights_input_hidden = np.dot(delta_hidden, x.T) / m
    grad_biases_input_hidden = np.sum(delta_hidden, axis=1, keepdims=True) / m

    # Update weights and biases
    weights_hidden_output -= learning_rate * grad_weights_hidden_output
    biases_hidden_output -= learning_rate * grad_biases_hidden_output
    weights_input_hidden -= learning_rate * grad_weights_input_hidden
    biases_input_hidden -= learning_rate * grad_biases_input_hidden

    return cost, weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output

def train(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, X, y, epochs, learning_rate, batch_size):
    input_size, hidden_size, output_size = weights_input_hidden.shape[1], weights_hidden_output.shape[0], weights_hidden_output.shape[1]

    for epoch in range(epochs):
        for i in range(0, X.shape[0], batch_size):
            x_batch = X[i:i + batch_size].T
            y_batch = y[i:i + batch_size].T

            hidden_input, hidden_output, final_input, final_output = forward_propagation(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, x_batch)

            cost, weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output = backward_propagation(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, x_batch, y_batch, hidden_input, hidden_output, final_input, final_output, learning_rate)

            print("Epoch:", epoch, "| Iteration:", i, "| Loss:", cost)

def predict(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, X):
    _, _, _, final_output = forward_propagation(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, X.T)
    return np.argmax(final_output, axis=0)

# Assume mnist data is loaded here

# Load and preprocess data
(train_X, train_y), (test_X, test_y) = mnist.load_data()
train_X = train_X.reshape(train_X.shape[0], -1) / 255.0
test_X = test_X.reshape(test_X.shape[0], -1) / 255.0

# Convert labels to one-hot encoding
train_y_one_hot = pd.get_dummies(train_y).values
test_y_one_hot = pd.get_dummies(test_y).values

# Split training data into training and validation sets
split_idx = int(0.8 * train_X.shape[0])
train_X, val_X = train_X[:split_idx], train_X[split_idx:]
train_y_one_hot, val_y_one_hot = train_y_one_hot[:split_idx], train_y_one_hot[split_idx:]

# Initialize weights and biases
input_size = 784
hidden_size = 128
output_size = 10
weights_input_hidden = np.random.randn(hidden_size, input_size) * 0.01
weights_hidden_output = np.random.randn(output_size, hidden_size) * 0.01
biases_input_hidden = np.zeros((hidden_size, 1))
biases_hidden_output = np.zeros((output_size, 1))

# Train the neural network
epochs = 100
learning_rate = 0.1
batch_size = 64
train(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, train_X, train_y_one_hot, epochs, learning_rate, batch_size)

# Test the model
predictions = predict(weights_input_hidden, weights_hidden_output, biases_input_hidden, biases_hidden_output, test_X)

# Calculate accuracy
accuracy = np.mean(predictions == test_y)
print("Test Accuracy:", accuracy)


# Calculate confusion matrix
conf_matrix = confusion_matrix(test_y, predictions)

print("Confusion Matrix:")
print(conf_matrix)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: 93 | Iteration: 16768 | Loss: 0.39565422584758636
Epoch: 93 | Iteration: 16832 | Loss: 0.4675080930301407
Epoch: 93 | Iteration: 16896 | Loss: 0.3883000177527065
Epoch: 93 | Iteration: 16960 | Loss: 0.3291251781563637
Epoch: 93 | Iteration: 17024 | Loss: 0.31784632896320586
Epoch: 93 | Iteration: 17088 | Loss: 0.6048128625243274
Epoch: 93 | Iteration: 17152 | Loss: 0.4446983248567203
Epoch: 93 | Iteration: 17216 | Loss: 0.5395938319832034
Epoch: 93 | Iteration: 17280 | Loss: 0.014365136811070116
Epoch: 93 | Iteration: 17344 | Loss: 0.44985589860612196
Epoch: 93 | Iteration: 17408 | Loss: 0.1400194234675026
Epoch: 93 | Iteration: 17472 | Loss: 0.2148879446198782
Epoch: 93 | Iteration: 17536 | Loss: 0.6858312927003232
Epoch: 93 | Iteration: 17600 | Loss: 0.21218880734272716
Epoch: 93 | Iteration: 17664 | Loss: 0.4071716504186482
Epoch: 93 | Iteration: 17728 | Loss: 1.576655583408189
Epoch: 93 | Iteration: 17792 | Los