# Neural Network Implementation from Scratch

This notebook implements a fully connected neural network using only NumPy. It defines a modular Object-Oriented structure with specific classes for Linear layers, Sigmoid activation, and Mean Squared Error loss. The code covers the full training pipeline, including the forward pass, manual backpropagation using the chain rule, matrix calculus for gradient computation, and a custom training loop to fit non-linear datasets.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
class Linear:
    def __init__(self, input_dim, output_dim):
        self.weights = np.random.randn(input_dim, output_dim)
        self.bias = np.random.randn(output_dim)
        self.input = None

    def forward(self, input):
        self.input = input.reshape(-1, 1) if len(input.shape) == 1 else input
        return self.input @ self.weights + self.bias

    def backward(self, grad):

        self.weights -= 0.001 * self.input.T @ grad
        self.bias -= 0.001 * np.sum(grad, axis=0)

        return grad @ self.weights.T


In [None]:
class Sigmoid:
    def __init__(self):
        self.input = None

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, input):
        self.input = input.reshape(-1, 1) if len(input.shape) == 1 else input
        return self.sigmoid(self.input)

    def backward(self, grad):
        return grad * self.sigmoid(self.input) * (1 - self.sigmoid(self.input))

In [None]:
class MSE:
    def __init__(self):
        self.input = None
        self.labels = None

    def forward(self, input, labels):
        self.input = input.reshape(-1, 1) if len(input.shape) == 1 else input
        self.labels = labels.reshape(-1, 1) if len(labels.shape) == 1 else labels
        return np.mean((self.input - self.labels) ** 2)

    def backward(self):
        return 2 * (self.input - self.labels) / len(self.input)

# Regression Problem

In [None]:
class NNReg:
    def __init__(self):
        self.z1 = Linear(1, 64)
        self.a1 = Sigmoid()
        self.z2 = Linear(64, 64)
        self.a2 = Sigmoid()
        self.zout = Linear(64, 1)
        self.aout = Sigmoid()

    def forward(self, input):
        z1 = self.z1.forward(input)
        a1 = self.a1.forward(z1)
        z2 = self.z2.forward(a1)
        a2 = self.a2.forward(z2)
        zout = self.zout.forward(a2)
        return zout

    def backward(self, grad):
        zout_back = self.zout.backward(grad)
        a2_back = self.a2.backward(zout_back)
        z2_back = self.z2.backward(a2_back)
        a1_back = self.a1.backward(z2_back)
        z1_back = self.z1.backward(a1_back)

In [None]:
X1 = np.linspace(10, 20, 300)
y_true1 = -X1 * X1 ** np.sin(np.pi * X1) * 0.01
y1 = y_true1 + np.random.randn(len(y_true1)) * 0.1

plt.scatter(X1, y1, s=2)
plt.plot(X1, y_true1, c='orange')
plt.show()
X2 = np.linspace(-10, 10, 300)
y_true2 = np.sin(0.5 * np.pi * X2)
y2 = y_true2 + np.random.randn(len(y_true2)) * 0.1

X = np.concat((X2, X1))
y_true = np.concat((y_true2, y_true1))
y = np.concat((y2, y1))

plt.scatter(X, y, s=2)
plt.plot(X, y_true, c='orange')
plt.show()

In [None]:
criterion = MSE()
model = NNReg()

x_mean = X.mean()
x_std = X.std()

X_norm = (X - x_mean) / x_std * 2
y_norm = y

x_new = (np.linspace(-15, 25, 200) - x_mean) / x_std * 2

history = []

epochs = 100000
for epoch in range(epochs):



    o = model.forward(X_norm)
    loss = criterion.forward(o, y_norm)
    loss_grad = criterion.backward()
    model.backward(loss_grad)
    if epoch % 2000 == 0:
            print(f"Epoch {epoch}: Loss {loss}")

            y_new = model.forward(x_new)
            history.append(y_new.copy())



In [None]:
%matplotlib widget
import matplotlib.pyplot as plt
from ipywidgets import IntSlider, Play, jslink, HBox, Output
from IPython.display import display

plt.close('all')
fig, ax = plt.subplots(figsize=(6, 4))

#ax.plot(X_norm, y_norm, '-', alpha=0.5)
#ax.plot(X_norm, y_true, alpha=0.5)
ax.scatter(X_norm, y_norm, c='orange', s=3, label='Data')
ax.set_ylim(np.min(y_norm) - 1, np.max(y_norm) + 1)

ax.set_title("Training Step: 0")

line, = ax.plot([], [], '-', color='green', linewidth=2, label='Prediction')

def update_view(change):
    i = change['new']

    line.set_data(x_new, history[i])
    ax.set_title(f"Training Step: {i * 2000}")

    fig.canvas.draw_idle()

ax.legend(loc='upper left')
play = Play(min=0, max=len(history)-1, step=1, interval=50) # Faster interval works now!
slider = IntSlider(min=0, max=len(history)-1, step=1)
jslink((play, 'value'), (slider, 'value'))

slider.observe(update_view, names='value')


display(HBox([play, slider]))

# Bonus: Binary Classification Loss

This is a bonus section not included in the video tutorial. I used Binary Cross-Entropy (BCE) loss function to build a classification network. The only difference compared to MSE is in the value returned in the forward pass and backward pass. These correspond to the BCE function and its derivative. I also changed the input dimension in the network from 1 to 2 because we are using a 2 dimensional dataset, and added an activation layer after the $Z_{out}$

In [None]:
class BCE:
    def __init__(self):
        self.input = None
        self.labels = None

    def forward(self, input, labels):
        self.input = input.reshape(-1, 1) if len(input.shape) == 1 else input
        self.labels = labels.reshape(-1, 1) if len(labels.shape) == 1 else labels
        return np.mean(- self.labels * np.log(self.input) - (1 - self.labels) * np.log(1 - self.input))

    def backward(self):
        return - self.labels / self.input + ((1 - self.labels) / (1 - self.input))

In [None]:
class NNClass:
    def __init__(self):
        self.z1 = Linear(2, 64)
        self.a1 = Sigmoid()
        self.z2 = Linear(64, 64)
        self.a2 = Sigmoid()
        self.zout = Linear(64, 1)
        self.aout = Sigmoid()

    def forward(self, input):
        z1 = self.z1.forward(input)
        a1 = self.a1.forward(z1)
        z2 = self.z2.forward(a1)
        a2 = self.a2.forward(z2)
        zout = self.zout.forward(a2)
        aout = self.aout.forward(zout)
        return aout

    def backward(self, grad):
        aout_back = self.aout.backward(grad)
        zout_back = self.zout.backward(aout_back)
        a2_back = self.a2.backward(zout_back)
        z2_back = self.z2.backward(a2_back)
        a1_back = self.a1.backward(z2_back)
        z1_back = self.z1.backward(a1_back)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

def generate_spiral(n_points, noise=0.2):
    n = n_points // 2
    dim = 2
    X = np.zeros((n_points, dim))
    y = np.zeros(n_points)

    for i in range(2):
        r = np.linspace(0.2, 5.0, n)
        t = np.linspace(0, 3 * np.pi, n) + (i * np.pi) + (np.random.randn(n) * noise)

        X[i*n:(i+1)*n] = np.c_[r * np.sin(t), r * np.cos(t)]
        y[i*n:(i+1)*n] = i
    return X, y

X, y = generate_spiral(400, noise=0.25)
y = y.reshape(-1, 1)

indices = np.arange(400)
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

plt.figure(figsize=(7, 6))
plt.scatter(X[y.flatten()==0][:, 0], X[y.flatten()==0][:, 1], s=20, c='blue', label='Class 0')
plt.scatter(X[y.flatten()==1][:, 0], X[y.flatten()==1][:, 1], s=20, c='red', label='Class 1')
plt.title("Complex Multi-Turn Spiral")
plt.legend()
plt.show()

In [None]:
criterion = BCE()
model = NNClass()

x_mean = X.mean(axis=0)
x_stdv = X.std(axis=0)

X_norm = (X - x_mean) / x_stdv

x0_min, x0_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x1_min, x1_max = X[:, 1].min() - 1, X[:, 1].max() + 1

xx, yy = np.meshgrid(np.linspace(x0_min, x0_max, 100), np.linspace(x1_min, x1_max, 100))
X_grid = np.c_[xx.ravel(), yy.ravel()]

X_plot = (X_grid - x_mean) / x_stdv

epochs = 10_000
history = []
print('started training')
for epoch in range(epochs):

    z_out = model.forward(X_norm)
    y_pred = z_out >= 0.5
    loss = criterion.forward(z_out, y)
    grad_loss = criterion.backward()
    model.backward(grad_loss)

    if epoch % 10 == 0:
        print(f'Epoch {epoch}: Loss {loss}')
        z_plot = model.forward(X_plot)
        y_plot = z_plot >= 0.5
        history.append(z_plot)



In [None]:
%matplotlib widget
import matplotlib.pyplot as plt
from ipywidgets import IntSlider, Play, jslink, HBox
from IPython.display import display

plt.close('all')
fig, ax = plt.subplots(figsize=(6, 4))

grid_x = X_plot[:, 0].reshape(100, 100)
grid_y = X_plot[:, 1].reshape(100, 100)

def update_view(change):
    i = change['new']
    ax.clear()

    ax.scatter(X_norm[:, 0], X_norm[:, 1], c=y.flatten(), cmap='bwr', edgecolors='k')

    Z = history[i].reshape(100, 100)
    ax.contourf(grid_x, grid_y, Z, levels=20, cmap='bwr', alpha=0.4)

    ax.set_title(f"Training Step: {i * 2000}")
    fig.canvas.draw_idle()

play = Play(min=0, max=len(history)-1, step=1, interval=100)
slider = IntSlider(min=0, max=len(history)-1, step=1)
jslink((play, 'value'), (slider, 'value'))

slider.observe(update_view, names='value')

update_view({'new': 0})

display(HBox([play, slider]))