# Fully Connected Neural Network from Scratch (NumPy Only) â€” Experiments

## Imports

In [1]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torchvision
from torchvision import transforms
import os

os.makedirs("results", exist_ok=True)


## Load MNIST Dataset

In [2]:

transform = transforms.ToTensor()

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
val_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=64, shuffle=False)

print("Train:", len(train_dataset), "Validation:", len(val_dataset))


Train: 60000 Validation: 10000


## Convert Torch to NumPy

In [3]:

def torch_to_numpy(loader):
    X_list, y_list = [], []
    for images, labels in loader:
        images = images.cpu()
        labels = labels.cpu()
        X_list.append(images.numpy().reshape(images.shape[0], -1))
        y_list.append(labels.numpy())
    return np.vstack(X_list), np.hstack(y_list)

X_train, y_train = torch_to_numpy(train_loader)
X_val, y_val = torch_to_numpy(val_loader)

def one_hot(y, c=10):
    out = np.zeros((y.size, c))
    out[np.arange(y.size), y] = 1
    return out

y_train_oh = one_hot(y_train)
y_val_oh = one_hot(y_val)


## Activation Functions

In [4]:

def relu(z): return np.maximum(0, z)
def relu_derivative(z): return (z > 0).astype(float)

def sigmoid(z): return 1/(1+np.exp(-z))
def sigmoid_derivative(z):
    s = sigmoid(z)
    return s*(1-s)

def tanh(z): return np.tanh(z)
def tanh_derivative(z): return 1 - np.tanh(z)**2

def softmax(z):
    e = np.exp(z - np.max(z, axis=1, keepdims=True))
    return e / np.sum(e, axis=1, keepdims=True)

def get_activation(name):
    if name == "relu": return relu, relu_derivative
    if name == "sigmoid": return sigmoid, sigmoid_derivative
    if name == "tanh": return tanh, tanh_derivative


## Neural Network Class

In [5]:

class NeuralNetwork:
    def __init__(self, layers, activation='relu', output_activation='softmax', lr=0.05):
        self.layers = layers
        self.lr = lr
        self.params = {}
        self.cache = {}
        self.act, self.act_der = get_activation(activation)
        self.output_activation = output_activation
        self.init_params()
    
    def init_params(self):
        for i in range(len(self.layers)-1):
            self.params['W'+str(i)] = np.random.randn(self.layers[i], self.layers[i+1]) * 0.01
            self.params['b'+str(i)] = np.zeros((1, self.layers[i+1]))
    
    def forward(self, X):
        self.cache['A0'] = X
        L = len(self.layers)-1
        
        for i in range(L-1):
            Z = self.cache['A'+str(i)] @ self.params['W'+str(i)] + self.params['b'+str(i)]
            A = self.act(Z)
            self.cache['Z'+str(i+1)] = Z
            self.cache['A'+str(i+1)] = A
        
        ZL = self.cache['A'+str(L-1)] @ self.params['W'+str(L-1)] + self.params['b'+str(L-1)]
        AL = softmax(ZL) if self.output_activation=="softmax" else self.act(ZL)
        self.cache['A'+str(L)] = AL
        return AL
    
    def compute_loss(self, Y_pred, Y_true):
        eps = 1e-9
        return -np.mean(np.sum(Y_true*np.log(Y_pred+eps), axis=1))
    
    def backward(self, Y_true):
        grads = {}
        L = len(self.layers)-1
        m = Y_true.shape[0]
        
        dZ = self.cache['A'+str(L)] - Y_true
        
        for i in reversed(range(L)):
            A_prev = self.cache['A'+str(i)]
            grads['dW'+str(i)] = A_prev.T @ dZ / m
            grads['db'+str(i)] = np.sum(dZ, axis=0, keepdims=True) / m
            
            if i > 0:
                dA = dZ @ self.params['W'+str(i)].T
                dZ = dA * self.act_der(self.cache['Z'+str(i)])
        
        self.grads = grads
    
    def update_parameters(self):
        for k in self.params:
            self.params[k] -= self.lr * self.grads['d'+k]
    
    def predict(self, X):
        return np.argmax(self.forward(X), axis=1)
    
    def evaluate(self, X, Y):
        return np.mean(self.predict(X) == np.argmax(Y, axis=1))


## Training Function

In [6]:

def train_model(model, Xtr, Ytr, Xv, Yv, epochs=5, batch_size=256):
    history = {'train_loss':[], 'val_loss':[], 'train_acc':[], 'val_acc':[]}
    n = Xtr.shape[0]
    
    for e in range(epochs):
        perm = np.random.permutation(n)
        Xtr, Ytr = Xtr[perm], Ytr[perm]
        
        for i in range(0, n, batch_size):
            xb = Xtr[i:i+batch_size]
            yb = Ytr[i:i+batch_size]
            model.forward(xb)
            model.backward(yb)
            model.update_parameters()
        
        train_pred = model.forward(Xtr)
        val_pred = model.forward(Xv)
        
        history['train_loss'].append(model.compute_loss(train_pred, Ytr))
        history['val_loss'].append(model.compute_loss(val_pred, Yv))
        history['train_acc'].append(model.evaluate(Xtr, Ytr))
        history['val_acc'].append(model.evaluate(Xv, Yv))
        
        print(f"Epoch {e+1} | TrainAcc={history['train_acc'][-1]:.3f} | ValAcc={history['val_acc'][-1]:.3f}")
    
    return history


## Experiments

In [7]:

experiments = [
    {"layers":[784,128,10], "activation":"relu"},
    {"layers":[784,128,64,10], "activation":"relu"},
    {"layers":[784,256,128,10], "activation":"relu"},
    {"layers":[784,128,10], "activation":"sigmoid"},
    {"layers":[784,128,10], "activation":"tanh"},
]

EPOCHS = 5
results = []

for idx, exp in enumerate(experiments):
    print("\nRunning:", exp)
    
    model = NeuralNetwork(exp["layers"], activation=exp["activation"], output_activation="softmax", lr=0.05)
    history = train_model(model, X_train, y_train_oh, X_val, y_val_oh, epochs=EPOCHS)
    
    exp_name = f"exp_{idx}_{exp['activation']}_{len(exp['layers'])-2}hidden"
    
    plt.figure()
    plt.plot(history['train_loss'], label="Train")
    plt.plot(history['val_loss'], label="Val")
    plt.legend()
    plt.title(exp_name+" Loss")
    plt.savefig(f"results/{exp_name}_loss.png")
    plt.close()
    
    plt.figure()
    plt.plot(history['train_acc'], label="Train")
    plt.plot(history['val_acc'], label="Val")
    plt.legend()
    plt.title(exp_name+" Accuracy")
    plt.savefig(f"results/{exp_name}_accuracy.png")
    plt.close()
    
    results.append({
        "Experiment": exp_name,
        "Layers": exp["layers"],
        "Activation": exp["activation"],
        "Final Train Acc": history["train_acc"][-1],
        "Final Val Acc": history["val_acc"][-1]
    })

results_df = pd.DataFrame(results)
results_df.to_csv("results/experiment_results.csv", index=False)
results_df



Running: {'layers': [784, 128, 10], 'activation': 'relu'}
Epoch 1 | TrainAcc=0.815 | ValAcc=0.822
Epoch 2 | TrainAcc=0.875 | ValAcc=0.881
Epoch 3 | TrainAcc=0.894 | ValAcc=0.897
Epoch 4 | TrainAcc=0.902 | ValAcc=0.905
Epoch 5 | TrainAcc=0.909 | ValAcc=0.913

Running: {'layers': [784, 128, 64, 10], 'activation': 'relu'}
Epoch 1 | TrainAcc=0.112 | ValAcc=0.114
Epoch 2 | TrainAcc=0.112 | ValAcc=0.114
Epoch 3 | TrainAcc=0.374 | ValAcc=0.378
Epoch 4 | TrainAcc=0.742 | ValAcc=0.749
Epoch 5 | TrainAcc=0.812 | ValAcc=0.818

Running: {'layers': [784, 256, 128, 10], 'activation': 'relu'}
Epoch 1 | TrainAcc=0.112 | ValAcc=0.114
Epoch 2 | TrainAcc=0.370 | ValAcc=0.377
Epoch 3 | TrainAcc=0.754 | ValAcc=0.765
Epoch 4 | TrainAcc=0.813 | ValAcc=0.822
Epoch 5 | TrainAcc=0.861 | ValAcc=0.861

Running: {'layers': [784, 128, 10], 'activation': 'sigmoid'}
Epoch 1 | TrainAcc=0.158 | ValAcc=0.159
Epoch 2 | TrainAcc=0.551 | ValAcc=0.562
Epoch 3 | TrainAcc=0.715 | ValAcc=0.722
Epoch 4 | TrainAcc=0.786 | ValAc

Unnamed: 0,Experiment,Layers,Activation,Final Train Acc,Final Val Acc
0,exp_0_relu_1hidden,"[784, 128, 10]",relu,0.908983,0.9126
1,exp_1_relu_2hidden,"[784, 128, 64, 10]",relu,0.811633,0.8175
2,exp_2_relu_2hidden,"[784, 256, 128, 10]",relu,0.861067,0.8612
3,exp_3_sigmoid_1hidden,"[784, 128, 10]",sigmoid,0.820517,0.8247
4,exp_4_tanh_1hidden,"[784, 128, 10]",tanh,0.909283,0.9127


## View Saved Files

In [8]:

os.listdir("results")


['experiment_results.csv',
 'exp_0_relu_1hidden_accuracy.png',
 'exp_0_relu_1hidden_loss.png',
 'exp_1_relu_2hidden_accuracy.png',
 'exp_1_relu_2hidden_loss.png',
 'exp_2_relu_2hidden_accuracy.png',
 'exp_2_relu_2hidden_loss.png',
 'exp_3_sigmoid_1hidden_accuracy.png',
 'exp_3_sigmoid_1hidden_loss.png',
 'exp_4_tanh_1hidden_accuracy.png',
 'exp_4_tanh_1hidden_loss.png']