In [None]:
# Using AI as well

# Basic Neural Network from Scratch
This notebook implements a simple neural network from scratch using NumPy. It follows the classic tutorial structure: load MNIST, preprocess, define a 1-hidden-layer network (or single logistic unit), train with batch gradient descent, and evaluate.

In [1]:
# Imports
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

np.random.seed(1)


In [3]:
# Load MNIST (via openml) and preprocess
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X = mnist['data'].astype(np.float32) / 255.0
y = mnist['target'].astype(np.int64)

# For the very basic example we'll do multiclass with softmax
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000, random_state=1)

# Transpose so each column is one example (n_x, m)
X_train = X_train.T
X_test = X_test.T
m_train = X_train.shape[1]

# One-hot encode labels for training (digits 0..9)
num_classes = 10
Y_train = np.eye(num_classes)[y_train].T  # shape (10, m_train)
Y_test = np.eye(num_classes)[y_test].T


In [None]:
# Utility functions

def sigmoid(Z):
    return 1.0 / (1.0 + np.exp(-Z))

def softmax(Z):
    # Z: (num_classes, m)
    Z_shift = Z - np.max(Z, axis=0, keepdims=True)
    expZ = np.exp(Z_shift)
    return expZ / np.sum(expZ, axis=0, keepdims=True)

def compute_loss(Y, A):
    # cross-entropy for multiclass, averaged over m
    m = Y.shape[1]
    # clip for numerical stability
    A = np.clip(A, 1e-12, 1.0 - 1e-12)
    loss = - (1.0 / m) * np.sum(Y * np.log(A))
    return loss


In [None]:
# Initialize parameters for a simple 1-hidden-layer network
n_x = X_train.shape[0]
n_h = 64
n_y = num_classes

W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros((n_y, 1))

learning_rate = 0.5
num_epochs = 500

m = m_train

# Training loop (full-batch gradient descent)
for epoch in range(num_epochs):
    # Forward
    Z1 = W1.dot(X_train) + b1
    A1 = sigmoid(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)

    # Loss
    loss = compute_loss(Y_train, A2)

    # Backprop
    dZ2 = A2 - Y_train                    # (n_y, m)
    dW2 = (1.0 / m) * dZ2.dot(A1.T)
    db2 = (1.0 / m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = W2.T.dot(dZ2)
    dZ1 = dA1 * A1 * (1 - A1)            # sigmoid derivative
    dW1 = (1.0 / m) * dZ1.dot(X_train.T)
    db1 = (1.0 / m) * np.sum(dZ1, axis=1, keepdims=True)

    # Update
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    if epoch % 50 == 0:
        print(f"Epoch {epoch}, loss = {loss:.4f}")

print('Training complete, final loss =', loss)


In [None]:
# Evaluate on test set
Z1_test = W1.dot(X_test) + b1
A1_test = sigmoid(Z1_test)
Z2_test = W2.dot(A1_test) + b2
A2_test = softmax(Z2_test)

preds = np.argmax(A2_test, axis=0)
true = y_test

print('Confusion matrix:')
print(confusion_matrix(true, preds))
print('\nClassification report:')
print(classification_report(true, preds))
