In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch

In [4]:
class NeuralNetwork:

    @staticmethod
    def sigmoid(x):
        
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def sigmoid_prime(x):
        a = NeuralNetwork.sigmoid(x)
        return a * (1 - a)

    @staticmethod
    def softmax(x):
        # Stability fix: subtract max for numerical stability
        exp_shifted = np.exp(x - np.max(x, axis=0, keepdims=True))
        return exp_shifted / np.sum(exp_shifted, axis=0, keepdims=True)

    @staticmethod
    def cross_entropy_loss(y_true, y_pred):
        # Add epsilon to avoid log(0)
        eps = 1e-12
        return -np.mean(y_true * np.log(y_pred + eps))

    @staticmethod
    def cross_entropy_derivative(y_true, y_pred):
        y_true = y_true.astype(np.float32)
        return y_pred - y_true

    def __init__(self, layer_sizes):
        self.costs = []
        self.iters = []
        self.weights = []
        self.biases = []
        self.Layers = len(layer_sizes)

        for k in range(self.Layers - 1):
            # Xavier initialization for sigmoid
            self.weights.append(np.random.randn(layer_sizes[k + 1], layer_sizes[k]) * np.sqrt(1. / layer_sizes[k]))
            self.biases.append(np.zeros((layer_sizes[k + 1], 1)))

    def forward(self, X):
        self.activations = [X]
        self.Z = []

        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            z = np.dot(w, self.activations[-1]) + b
            self.Z.append(z)

            if i == self.Layers - 2:
                a = NeuralNetwork.softmax(z)
            else:
                a = NeuralNetwork.sigmoid(z)

            self.activations.append(a)

        return self.activations[-1]

    def backward(self, X, y):
        dw = [None] * (self.Layers - 1)
        db = [None] * (self.Layers - 1)
        dz = [None] * (self.Layers)

        y_pred = self.activations[-1]
        cost = NeuralNetwork.cross_entropy_loss(y, y_pred)

        # Last layer derivative
        dz[-1] = NeuralNetwork.cross_entropy_derivative(y, y_pred)

        for l in reversed(range(self.Layers - 1)):
            a_prev = self.activations[l]
            dz_current = dz[l + 1]

            dw[l] = np.dot(dz_current, a_prev.T) / X.shape[1]
            db[l] = np.sum(dz_current, axis=1, keepdims=True) / X.shape[1]

            if l != 0:
                da_prev = np.dot(self.weights[l].T, dz_current)
                dz[l] = da_prev * NeuralNetwork.sigmoid_prime(self.Z[l - 1])

        return cost, dw, db

    def train(self, X, y, alpha=0.1, epochs=1000):
        for i in range(epochs):
            self.forward(X)
            cost, dw, db = self.backward(X, y)

            if i % (epochs // 10) == 0 or i == epochs - 1:
                self.costs.append(cost)
                self.iters.append(i)
                print(f"Epoch {i}, Cost: {cost:.4f}")

            for j in range(self.Layers - 1):
                self.weights[j] -= alpha * dw[j]
                self.biases[j] -= alpha * db[j]

    def predict(self, X):
        return self.forward(X)

    def evaluate(self, X, y_true):
        y_pred = self.forward(X)
        pred_labels = np.argmax(y_pred, axis=0)
        true_labels = np.argmax(y_true, axis=0)
        accuracy = np.mean(pred_labels == true_labels)
        return accuracy

In [5]:
import struct

def load_images(filename):
    with open(filename, 'rb') as f:
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols)
        return images

def load_labels(filename):
    with open(filename, 'rb') as f:
        magic, num = struct.unpack(">II", f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

train_images = load_images('input/train-images.idx3-ubyte')/255
train_labels = load_labels('input/train-labels.idx1-ubyte')
test_images = load_images('input/t10k-images.idx3-ubyte')/255
test_labels = load_labels('input/t10k-labels.idx1-ubyte')


In [6]:
flat_train_images = train_images.reshape(60000, 28*28).T
flat_test_images = test_images.reshape(10000, 28*28).T


In [8]:
def one_hot(y, num_classes=10):
    return np.eye(num_classes)[y].T  # shape: (10, batch_size)

y_train = one_hot(train_labels)
y_test = one_hot(test_labels)

In [9]:
model = NeuralNetwork([784, 64, 16, 10])
model.train(flat_train_images, y_train, alpha=3.5, epochs=500)
acc = model.evaluate(flat_test_images, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")

Epoch 0, Cost: 0.2446
Epoch 50, Cost: 0.0868
Epoch 100, Cost: 0.0432
Epoch 150, Cost: 0.0301
Epoch 200, Cost: 0.0249
Epoch 250, Cost: 0.0225
Epoch 300, Cost: 0.0186
Epoch 350, Cost: 0.0166
Epoch 400, Cost: 0.0148
Epoch 450, Cost: 0.0134
Epoch 499, Cost: 0.0122
Test Accuracy: 95.82%


In [None]:
acc = model.evaluate(flat_train_images, y_train)
print(f"Train Accuracy: {acc * 100:.2f}%")
acc = model.evaluate(flat_test_images, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")

Train Accuracy: 99.30%
Test Accuracy: 97.08%


In [8]:
model = NeuralNetwork([784, 64, 16, 10])
model.train(flat_train_images, y_train, alpha=3.5, epochs=3000)
acc = model.evaluate(flat_test_images, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")

Epoch 0, Cost: 0.2360
Epoch 300, Cost: 0.0191
Epoch 600, Cost: 0.0113
Epoch 900, Cost: 0.0076
Epoch 1200, Cost: 0.0248
Epoch 1500, Cost: 0.0071
Epoch 1800, Cost: 0.0049
Epoch 2100, Cost: 0.0038
Epoch 2400, Cost: 0.0526
Epoch 2700, Cost: 0.0073
Epoch 2999, Cost: 0.0048
Test Accuracy: 96.97%


In [33]:
# enabled CUDA

from PIL import Image

# Load the image
image_path = 'my2.png'  # replace with your file path
image = Image.open(image_path).convert('L')  # Convert to grayscale

# Resize to 28x28 if not already
image = image.resize((28, 28))

# Convert to NumPy array and flatten
image_array = np.array(image)
flattened_array = image_array.flatten()

# Normalize pixel values to 0-1 (optional)
flattened_array = flattened_array / 255.0

print(flattened_array)
print("Shape:", flattened_array.reshape((784, 1)).shape)


[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [34]:
model.predict(flattened_array.reshape((784, 1))).argmax()

np.int64(2)

In [32]:
flattened_array.reshape((784, 1))

array([[0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.   