# Lab Assignment: Building a Neural Network from Scratch

### Objective
This lab guides you through the implementation of a simple feedforward neural network from scratch. 
By completing this lab, you will:
- Initialize a neural network with weights and biases.
- Compute the weighted sum at each node.
- Apply activation functions for node outputs.
- Perform forward propagation to compute predictions.
- Implement backpropagation to compute gradients.
- Update weights using gradients to minimize the loss.

## Step 1: Initialize the Network

In [162]:
import numpy as np
np.random.seed(42) # For reproducibility
def initialize_network(input_size, hidden_layers, output_size):
    # Your code should contain the weights and biases
    # provide your code
    network = {}
    layer_sizes = [input_size] + hidden_layers + [output_size]  # Combine all layer sizes

    for i in range(len(layer_sizes) - 1):
        weights = np.random.randn(layer_sizes[i+1], layer_sizes[i]) * 0.01  # Xavier/Glorot initialization for sigmoid
        biases = np.zeros((layer_sizes[i+1], 1))

        network[f'weights{i+1}'] = weights # weights matrix for the layer
        network[f'biases{i+1}'] = biases # bias vector for the layer

    return network

 
    

# Initialize a network with 3 inputs, 2 hidden layers (4 and 3 nodes), and 1 output node
network = initialize_network(3, [4, 3], 1)
print("Cole Manchester + Initialized Network:", network)

Cole Manchester + Initialized Network: {'weights1': array([[ 0.00496714, -0.00138264,  0.00647689],
       [ 0.0152303 , -0.00234153, -0.00234137],
       [ 0.01579213,  0.00767435, -0.00469474],
       [ 0.0054256 , -0.00463418, -0.0046573 ]]), 'biases1': array([[0.],
       [0.],
       [0.],
       [0.]]), 'weights2': array([[ 0.00241962, -0.0191328 , -0.01724918, -0.00562288],
       [-0.01012831,  0.00314247, -0.00908024, -0.01412304],
       [ 0.01465649, -0.00225776,  0.00067528, -0.01424748]]), 'biases2': array([[0.],
       [0.],
       [0.]]), 'weights3': array([[-0.00544383,  0.00110923, -0.01150994]]), 'biases3': array([[0.]])}


## Step 2: Compute Weighted Sum

In [214]:
def compute_weighted_sum(inputs, weights, biases):
    # Please numpy dot to calcuate the compute weighted with linear 
    # provide your code
    if inputs.ndim == 1:  
        inputs = inputs.reshape(-1, 1)  # Ensure column vector
    
    elif inputs.shape[0] == 1:  
        inputs = inputs.T  # Convert row vector (1,3) → (3,1)
    
    return np.dot(weights, inputs) + biases 
    #return np.sum(inputs * weights) + biases
    #use numpy dot function
    
    
network = {
    'weights': np.random.randn(4, 3),  # 4 neurons, 3 input features
    'biases': np.zeros((4, 1))         # 4 neurons, 1 bias per neuron
}#test for the sum values

# Test weighted sum
inputs = np.array([[0.5, 0.2, 0.1]])
layer = network   #set as dictionary so no [0] needed  # First layer
Z = compute_weighted_sum(inputs, layer['weights'], layer['biases'])
print("Cole + Weighted Sum:", Z)

Cole + Weighted Sum: [[-1.00200758]
 [ 0.55444362]
 [-0.03492299]
 [-0.15251886]]


## Step 3: Compute Node Activation

In [217]:
def sigmoid(Z):
#     provide your code
    return 1/(1+np.exp(-Z))
 
def sigmoid_derivative(A):
    # provide your code
    return A*(1-A)

# Compute activation for the weighted sum
A = sigmoid(Z)
print("Cole + Activation:", A)

Cole + Activation: [[0.26854689]
 [0.63516593]
 [0.49127014]
 [0.46194403]]


## Step 4: Perform Forward Propagation

In [220]:
import numpy as np

def forward_propagation(inputs, network):
   # performs forward propagation through the network
    inputs = np.array(inputs)  # convert to numpy array if not already
    if inputs.shape[0] == 1:
        inputs = inputs.T  # convert to column vector if single example

    activations = [inputs]
    current_input = inputs
    layer_num = 1

    while f'weights{layer_num}' in network and f'biases{layer_num}' in network:
        print(f"--- Layer {layer_num} ---")  # indicate the layer number -- 

        weights = network[f'weights{layer_num}']
        biases = network[f'biases{layer_num}']

        print("Current Input Shape:", current_input.shape)
        print("Weights Shape:", weights.shape)
        print("Biases Shape:", biases.shape)

        z = compute_weighted_sum(current_input, weights, biases)
        print("Z (Weighted Sum) Shape:", z.shape)

        a = sigmoid(z)
        print("A (Activations) Shape:", a.shape)

        activations.append(a)
        current_input = a  # update for next layer
        layer_num += 1

    return activations

# Example input & network info
inputs = np.array([[0.5, 0.2, 0.1]]) # 1 input, 3 features
network = {
    'weights1': np.random.randn(4, 3),
    'biases1': np.zeros((4, 1)),
    'weights2': np.random.randn(2, 4),
    'biases2': np.zeros((2, 1))
}


# Perform forward propagation
activations = forward_propagation(inputs, network)
print(activations)
print("Cole + Final Output:", activations[-1])

--- Layer 1 ---
Current Input Shape: (3, 1)
Weights Shape: (4, 3)
Biases Shape: (4, 1)
Z (Weighted Sum) Shape: (4, 1)
A (Activations) Shape: (4, 1)
--- Layer 2 ---
Current Input Shape: (4, 1)
Weights Shape: (2, 4)
Biases Shape: (2, 1)
Z (Weighted Sum) Shape: (2, 1)
A (Activations) Shape: (2, 1)
[array([[0.5],
       [0.2],
       [0.1]]), array([[0.57716488],
       [0.38718866],
       [0.52699662],
       [0.30447332]]), array([[0.56506212],
       [0.39580708]])]
Cole + Final Output: [[0.56506212]
 [0.39580708]]


## Step 5: Backpropagation

In [223]:
def backpropagation(network, activations, y_true):
    learning_rate = 0.6
    gradients = {}  # Store gradients for weight and bias updates
    m = Y.shape[1]  # Number of training examples
    dA = activations[-1] - Y  # Output layer error

    for layer in reversed(range(1, len(activations))):
        A_prev = activations[layer - 1]
        W = network[f'weights{layer}']
        
        dZ = dA * sigmoid_derivative(activations[layer])  # apply derivative
        dW = np.dot(dZ, A_prev.T) / m  # weight gradient
        db = np.sum(dZ, axis=1, keepdims=True) / m  # bias gradient
        # store gradients
        gradients[f'dW{layer}'] = dW
        gradients[f'db{layer}'] = db
        # update weights and biases
        network[f'weights{layer}'] -= learning_rate * dW
        network[f'biases{layer}'] -= learning_rate * db

        # cmpute error for previous layer
        dA = np.dot(W.T, dZ)  # Backpropagate error

    return network

# Example setup
Y = np.array([[1], [0]])  # True labels (2 neurons output)
network = {
    'weights1': np.random.randn(4, 3),
    'biases1': np.zeros((4, 1)),
    'weights2': np.random.randn(2, 4),
    'biases2': np.zeros((2, 1))
}

# Perform forward propagation
inputs = np.array([[0.5, 0.2, 0.1]])
# Compute gradients
y_true = np.array([[1]])  # Example target output
gradients = backpropagation(network, activations, y_true)
print("Cole + Gradients:", gradients)

Cole + Gradients: {'weights1': array([[-0.03304352,  0.05761835,  2.46193618],
       [-0.18822995,  0.30319975, -0.03388557],
       [-1.1471225 ,  1.15144503,  0.75624414],
       [ 0.7938667 , -0.90825355,  1.40336126]]), 'biases1': array([[-0.0130593 ],
       [ 0.00826203],
       [ 0.04311107],
       [ 0.00566951]]), 'weights2': array([[-1.36483401,  0.61168983,  2.22425508, -0.97100862],
       [-0.59907659,  0.0776618 , -0.53340532, -1.56795535]]), 'biases2': array([[ 0.06413601],
       [-0.05679289]])}


## Step 6: Update Weights

In [226]:
def update_weights(network, gradients, learning_rate):
    # Hints: weights -= learning_rate * 'dW'
    # Hints: biases -= learning_rate * 'db'
    # Provide your code 
    for layer in range(1, len(network) + 1):  # iterate through layers
        network[f'layer{layer}']['weights'] -= learning_rate * gradients[f'dW{layer}']
        network[f'layer{layer}']['biases'] -= learning_rate * gradients[f'db{layer}']

# test Network
network = {
    'layer1': {'weights': np.array([[0.1, 0.2], [0.3, 0.4]]), 'biases': np.array([[0.5], [0.6]])},
    'layer2': {'weights': np.array([[0.7, 0.8], [0.9, 1.0]]), 'biases': np.array([[1.1], [1.2]])}
}

gradients = {
    'dW1': np.array([[0.01, 0.02], [0.03, 0.04]]),
    'db1': np.array([[0.05], [0.06]]),
    'dW2': np.array([[0.07, 0.08], [0.09, 0.10]]),
    'db2': np.array([[0.11], [0.12]])
}

learning_rate = 0.6
update_weights(network, gradients, learning_rate)
print("Updated Network:", network)



# Update weights with a learning rate of 0.1
update_weights(network, gradients, learning_rate=0.1)
print("Cole + Updated Network:", network)

Updated Network: {'layer1': {'weights': array([[0.094, 0.188],
       [0.282, 0.376]]), 'biases': array([[0.47 ],
       [0.564]])}, 'layer2': {'weights': array([[0.658, 0.752],
       [0.846, 0.94 ]]), 'biases': array([[1.034],
       [1.128]])}}
Cole + Updated Network: {'layer1': {'weights': array([[0.093, 0.186],
       [0.279, 0.372]]), 'biases': array([[0.465],
       [0.558]])}, 'layer2': {'weights': array([[0.651, 0.744],
       [0.837, 0.93 ]]), 'biases': array([[1.023],
       [1.116]])}}


## Step 7: Visualizing Loss Changes

In [229]:
# Use MSE to compute the loss 
import numpy as np
def compute_loss(y_true, y_pred):
    # provide your code
    y_true = np.array(y_true).reshape(-1)
    y_pred = np.array(y_pred).reshape(-1)
    #back to numpy
    return np.mean((y_true - y_pred) ** 2)  # Mean Squared Error (MSE)



# a good test case:
y_true = [1, 2, 3, 4, 5]
y_pred = [1.1, 1.8, 3.2, 3.9, 5.2]

loss = compute_loss(y_true, y_pred)
print(f"MSE Loss: {loss}")


y_true = np.array([1, 2, 3, 4, 5])
y_pred = np.array([1.1, 1.8, 3.2, 3.9, 5.2])

loss = compute_loss(y_true, y_pred)
print(f"MSE Loss: {loss}")

MSE Loss: 0.028000000000000032
MSE Loss: 0.028000000000000032


In [231]:
import matplotlib.pyplot as plt
import numpy as np
# Training Loop
losses = []
inputs = np.array([[0.5, 0.2, 0.1]])
y_true = np.array([[1]])
learning_rate = 0.1

for iteration in range(100):
    # provide your code
    # Hints: forward_propagation function with inputs network
    #        compute_loss for y_true and activations[-1]
    #        add loss to losses
     

    # gradients = backpropagation function
    # update_weights
    activations = forward_propagation(inputs,network)
    y_pred = activations[-1]
    loss = compute_loss(y_true, y_pred)
    #prediction is the final output 
    losses.append(loss)
    gradients = backpropagation(activations, y_true, network)
    #should give us a gradient!
    update_weights(network, gradients, learning_rate)
    
# Plot Loss and rerun all cells
plt.plot(losses)
plt.title("Cole + Loss Before and After Weight Updates")
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.show()


TypeError: list indices must be integers or slices, not str

### Step 8: Visualizing Gradients Changes (Graduate students)

Please pick a weight and plot the gradient change

You need to point which weight you pick and label it on your graph.

In [235]:
# Your code