In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [3]:
np.random.seed(42)

In [15]:
"""
ny: nbr of neurons in each layer

w1, b1 for input layer
w2, b2 for hidden layer
"""

def init_params(nx, nh, ny):

    return {
        'w1': np.random.normal(0, 0.3, (nx, nh)),
        'w2': np.random.normal(0, 0.3, (nh, ny)), 
        'b1': np.zeros((1, nh)),
        'b2': np.zeros((1, ny)),
    }

In [17]:
def softmax(x):
    tmp = np.exp(x - np.max(x))
    return tmp / np.sum(tmp)

def forward(params, x):
    a1 = x
    z1 = np.dot(params['w1'], a1.T) + params['b1']
    a2 = np.tanh(z1)
    z2 = np.dot(params['w2'], a2) + params['b2']
    a3 = softmax(z2)

    int_values = {
        'z1': z1,
        'a2': a2,
        'z2': z2,
        'a3': a3,
    }

    return a3, int_values

In [25]:
def loss_accuracy(yhat,y):
    rows = y.shape[0]
    loss = (-np.sum(y * np.log(yhat))) / rows 

    pred = np.argmax(yhat, axis=0)
    labels = np.argmax(y, axis=0)
    accuracy = np.mean(pred == labels)

    return loss, accuracy    

In [26]:
def backward(x, params, outputs, y):
    cols = x.shape[1]
    
    d_z2 = oytputs['a3'] - y
    d_w2 = np.dot(d_z2, outputs['a2'].T) / cols
    d_b2 = np.sum(d_z2, axis=1, keepdims=True) / cols
    
    d_z1 = np.dot(params['w2'].T, d_z2) * (1 - np.power(outputs['a2'], 2))
    d_w1 = np.dot(d_z1, x.T) / cols
    d_b1 = np.sum(d_z1, axis=1, keepdims=True) / cols
    
    gradients = {
        "d_w1": d_w1,
        "d_b1": d_b1,
        "d_w2": d_w2,
        "d_b2": d_b2
    }
    
    return gradients

In [28]:
def sgd(params, grads, eta):
    
    params["w1"] -= eta * grads["d_w1"]
    params["w2"] -= eta * grads["d_w2"]
    params["b1"] -= eta * grads["d_b1"]
    params["b2"] -= eta * grads["d_b2"]
    
    return params

## trainig steps

In [40]:
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1).T / 255.0
X_test = X_test.reshape(X_test.shape[0], -1).T / 255.0
Y_train = to_categorical(y_train, 10).T
Y_test = to_categorical(y_test, 10).T

# Initialize parameters
nx = X_train.shape[0]
nh = 128
ny = 10
params = init_params(nx, nh, ny)

# Training parameters
epochs = 50
batch_size = 128
eta = 0.1
n_batches = X_train.shape[1] // batch_size

# Training loop
loss_history = []
accuracy_history = []

for epoch in range(epochs):
    # Shuffle data
    permutation = np.random.permutation(X_train.shape[1])
    X_train_shuffled = X_train[:, permutation]
    Y_train_shuffled = Y_train[:, permutation]
    
    for i in range(n_batches):
        # Get mini-batch
        start = i * batch_size
        end = start + batch_size
        X_batch = X_train_shuffled[:, start:end]
        Y_batch = Y_train_shuffled[:, start:end]
        
        # Forward propagation
        Yhat, outputs = forward(params, X_batch)
        
        # Compute loss and accuracy
        loss, acc = loss_accuracy(Yhat, Y_batch)
        
        # Backpropagation
        grads = backward(X_batch, params, outputs, Y_batch)
        
        # Update parameters
        params = sgd(params, grads, eta)
    
    # Evaluate on the entire training set
    Yhat_train, _ = forward(params, X_train)
    train_loss, train_acc = loss_accuracy(Yhat_train, Y_train)
    loss_history.append(train_loss)
    accuracy_history.append(train_acc)
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")

# Plot loss and accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(loss_history, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(accuracy_history, label='Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()

plt.show()

ModuleNotFoundError: No module named 'tensorflow.keras'

## hand written digit recognition dataset mnist

In [30]:
mnist = pd.read_csv('./mnist.csv')
mnist.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
mnist.describe()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,...,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,4.4434,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1793,0.1636,0.0526,0.0006,0.0,0.0,0.0,0.0,0.0,0.0
std,2.895865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.674149,5.736072,2.420004,0.06,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,253.0,253.0,156.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
mnist.isna().sum()

label    0
1x1      0
1x2      0
1x3      0
1x4      0
        ..
28x24    0
28x25    0
28x26    0
28x27    0
28x28    0
Length: 785, dtype: int64

In [33]:
y = mnist['label']
x = mnist.drop(['label'],axis=1)

In [34]:
y = pd.get_dummies(y).T.to_numpy()
x = x.to_numpy()

In [35]:
nx = x.shape[1]
nh = 4
ny = y.shape[0]
params = init_params(nx, nh, ny)

num_epochs = 5000
eta = 0.1

loss_history = []
accuracy_history = []
for i in range(num_epochs):
    y_hat, cache = forward(params, x)
    
    loss,accuracy = loss_accuracy(y_hat,y)
    
    loss_history.append(loss)
    accuracy_history.append(accuracy)
    
    result = backward(params, cache, x, y)
    
    params = sgd(params, result, eta)

plt.plot(loss_history)
plt.title("loss")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

plt.plot(accuracy_history)
plt.title("accuracy")
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.show()


ValueError: shapes (784,4) and (784,10000) not aligned: 4 (dim 1) != 784 (dim 0)