**CSE473s Computational Intelligence: Build Your Own Neural Network Library**


First, we need to ensure our Python environment can find and import all the modules we built in the lib/ directory.

We import all core components: Layers, Activations, Loss, Optimizer, and the orchestrating Network class.


In [13]:
import numpy as np
import sys
import os


# --- 1. Robust Function to Find Project Root ---
def find_project_root():
    """
    Finds the project root directory (the one containing the 'lib' folder) 
    by walking up the directory structure. This correctly handles cross-platform 
    filesystem root checks.
    """
    current_path = os.path.abspath(os.getcwd())
    
    # Loop indefinitely until we find 'lib' or hit the root of the filesystem
    while True:
        # Check if 'lib' is present in the current directory
        if 'lib' in os.listdir(current_path):
            return current_path
        
        # Calculate the parent directory
        parent_path = os.path.dirname(current_path)
        
        # STOP CONDITION: If the parent path is the same as the current path, we are at the root.
        if parent_path == current_path:
            break
            
        current_path = parent_path
    return None

# --- 2. Execute Path Setup and Imports ---
print("Attempting to configure path and import library components...")
project_root = find_project_root()

if project_root is None:
    print("FATAL ERROR: Could not find the 'lib' directory anywhere in the parent paths.")
    print("Please ensure your folder structure is correct: project_root/lib/...")
else:
    if project_root not in sys.path:
        sys.path.append(project_root)
        print(f"Project root added to path: {project_root}")
        
    # Import all custom components after path setup is complete
    try:
        from lib.Network import Network
        from lib.Layers import Dense, Layer
        from lib.Activation import Tanh, Sigmoid 
        from lib.Loss import MeanSquaredError as MSE 
        from lib.Optimizer import SGD
    
        
        print("All library components imported successfully!")
    except ImportError as e:
        print(f"CRITICAL IMPORT ERROR: Could not import library components. Error: {e}")
        print("Suggestion: If this fails, double-check the capitalization of files inside your 'lib' folder (e.g., Activation.py vs. activations.py).")

np.set_printoptions(precision=6, suppress=True)

Attempting to configure path and import library components...
All library components imported successfully!


**Section 1: Gradient Checking (Proving Backpropagation)**  

    The goal of Gradient Checking (or numerical gradient verification) is to prove that the complex mathematical derivative calculated by your layer.backward() method is numerically correct.We compare the Analytical Gradient (your backward method) against the Numerical Gradient (calculated using the finite difference approximation). If they are nearly identical, your backpropagation is correct.

1.  Finite Difference Approximation Function: The numerical gradient approximation is calculated using the formula:$$\frac{\partial L}{\partial w} \approx \frac{L(w + \epsilon) - L(w - \epsilon)}{2\epsilon}$$where $\epsilon$ is a small number (e.g., $10^{-7}$).


In [14]:
def numerical_gradient(model, X, Y_true, epsilon=1e-7):
    """
    Calculates the numerical gradient for all trainable parameters in the network.
    
    Args:
        model (Network): The network object being checked.
        X (np.ndarray): Input data.
        Y_true (np.ndarray): Target data.
        epsilon (float): The small perturbation value.
        
    Returns:
        dict: A dictionary of numerical gradients for W and b for each Dense layer.
    """
    numerical_grads = {}
    
    # We must clone the network weights before perturbation to avoid corruption
    original_weights = {}
    for i, layer in enumerate(model.layers):
        if hasattr(layer, 'W'):
            original_weights[i] = {'W': layer.W.copy(), 'b': layer.b.copy()}

    # 1. Calculate base loss for comparison
    base_loss = model.loss_fn.loss(Y_true, model.forward(X))
    print(f"Base Loss (L(w)): {base_loss:.8f}")

    # Iterate over all trainable layers (Dense)
    for i, layer in enumerate(model.layers):
        if hasattr(layer, 'W'):
            # --- Check W gradients ---
            grad_W_num = np.zeros_like(layer.W)
            it = np.nditer(layer.W, flags=['multi_index'], op_flags=['readwrite'])
             
            while not it.finished:
                idx = it.multi_index
                
                # L(w + epsilon): Perturb weight up
                layer.W[idx] += epsilon
                loss_plus = model.loss_fn.loss(Y_true, model.forward(X))
                
                # L(w - epsilon): Perturb weight down
                layer.W[idx] = original_weights[i]['W'][idx] - epsilon # Must reset to original, then perturb down
                loss_minus = model.loss_fn.loss(Y_true, model.forward(X))
                
                # Reset weight to original value for next check
                layer.W[idx] = original_weights[i]['W'][idx] 
                
                # Calculate numerical gradient
                grad_W_num[idx] = (loss_plus - loss_minus) / (2 * epsilon)
                it.iternext()
            
            # --- Check b gradients ---
            grad_b_num = np.zeros_like(layer.b)
            it = np.nditer(layer.b, flags=['multi_index'], op_flags=['readwrite'])
            
            while not it.finished:
                idx = it.multi_index
                
                # L(b + epsilon): Perturb bias up
                layer.b[idx] += epsilon
                loss_plus = model.loss_fn.loss(Y_true, model.forward(X))
                
                # L(b - epsilon): Perturb bias down
                layer.b[idx] = original_weights[i]['b'][idx] - epsilon 
                loss_minus = model.loss_fn.loss(Y_true, model.forward(X))
                
                # Reset bias to original value
                layer.b[idx] = original_weights[i]['b'][idx] 
                
                # Calculate numerical gradient
                grad_b_num[idx] = (loss_plus - loss_minus) / (2 * epsilon)
                it.iternext()
                
            numerical_grads[i] = {'W_num': grad_W_num, 'b_num': grad_b_num}
    
    return numerical_grads


2. Running the Check: We will use a minimal network and the XOR data to perform the check.

In [15]:
# 1. Setup Data (use a single sample for simplicity, although batch works)
X_check = np.array([[1.0, 0.0]])
Y_check = np.array([[1.0]])

# 2. Build a Minimal Network (2 input -> 3 hidden -> 1 output)
np.random.seed(42)
check_model = Network()
check_model.add(Dense(2, 3, seed=None))
check_model.add(Tanh())
check_model.add(Dense(3, 1, seed=None))
check_model.add(Sigmoid())

check_model.compile(MSE(), SGD(learning_rate=0.01))

# 3. Calculate Analytical Gradients
# First, run forward/backward pass once to calculate and store dW/db
Y_pred_check = check_model.forward(X_check)
loss_value = check_model.loss_fn.loss(Y_check, Y_pred_check) 
dLoss_dY = check_model.loss_fn.gradient() # Pass Y_pred to store in Loss
check_model.backward(dLoss_dY)

analytical_grads = {}
for i, layer in enumerate(check_model.layers):
    if hasattr(layer, 'W'):
        analytical_grads[i] = {'W_ana': layer.dW, 'b_ana': layer.db}

# 4. Calculate Numerical Gradients
numerical_grads = numerical_gradient(check_model, X_check, Y_check)

# 5. Compare and Verify
print("\n--- Gradient Check Results (Dense Layer 1: W) ---")
# Layer 1 (index 0)
W1_ana = analytical_grads[0]['W_ana']
W1_num = numerical_grads[0]['W_num']
difference_W1 = np.linalg.norm(W1_ana - W1_num) / (np.linalg.norm(W1_ana) + np.linalg.norm(W1_num))

print("Analytical dW1:\n", W1_ana)
print("\nNumerical dW1:\n", W1_num)
print(f"\nRelative Difference (W1): {difference_W1:.10f}")


print("\n--- Gradient Check Results (Dense Layer 1: b) ---")
b1_ana = analytical_grads[0]['b_ana']
b1_num = numerical_grads[0]['b_num']
difference_b1 = np.linalg.norm(b1_ana - b1_num) / (np.linalg.norm(b1_ana) + np.linalg.norm(b1_num))

print("Analytical db1:\n", b1_ana)
print("\nNumerical db1:\n", b1_num)
print(f"\nRelative Difference (b1): {difference_b1:.10f}")

# The check is considered successful if the relative difference is < 1e-7
is_success = difference_W1 < 1e-7 and difference_b1 < 1e-7
print("\n--- VERIFICATION ---")
print(f"Gradient Check Successful: {is_success} (Target diff < 1e-7)")

# You should also check the second Dense layer (index 2) in your final notebook.

Network compiled successfully.
Base Loss (L(w)): 0.03190939

--- Gradient Check Results (Dense Layer 1: W) ---
Analytical dW1:
 [[ 0.047918 -0.018312 -0.009219]
 [ 0.        0.        0.      ]]

Numerical dW1:
 [[ 0.047918 -0.018312 -0.009219]
 [ 0.        0.        0.      ]]

Relative Difference (W1): 0.0000000021

--- Gradient Check Results (Dense Layer 1: b) ---
Analytical db1:
 [[ 0.047918 -0.018312 -0.009219]]

Numerical db1:
 [[ 0.047918 -0.018312 -0.009219]]

Relative Difference (b1): 0.0000000021

--- VERIFICATION ---
Gradient Check Successful: True (Target diff < 1e-7)


3. AnalysisConclusion: The relative difference between the analytical gradient calculated by backward() and the numerical gradient calculated by the approximation function is extremely small, typically less than $10^{-7}$. This proves that the implementation of backpropagation in the Dense and activation layers is mathematically correct.

**Section 2: The XOR Problem (Training and Results)**

This section demonstrates the core functionality of your library by solving the classic non-linear XOR problem.
1. Data and Model Definition
We define the standard XOR dataset and build the $2 \to 4 \to 1$ network architecture.
2. Training Execution
We run the training loop and observe the loss decrease.

In [19]:
# 1. Create XOR Data Set
X_data = np.array([[-1, -1], [-1, 1], [1, -1], [1, 1]])
Y_true = np.array([[0], [1], [1], [0]])

# 2. Instantiate Model and Hyperparameters
EPOCHS = 10000  
LEARNING_RATE = 0.1

XOR_Model = Network()

# Architecture: 2 input nodes -> Dense(4) -> Tanh -> Dense(1) -> Sigmoid -> 1 Output node
XOR_Model.add(Dense(2, 4, seed=None))
XOR_Model.add(Tanh()) 
XOR_Model.add(Dense(4, 1, seed=None))
XOR_Model.add(Sigmoid()) 

# 3. Compile Model
opt = SGD(learning_rate=LEARNING_RATE)
XOR_Model.compile(MSE(), opt)

print("XOR Model compiled successfully.")
print(f"Training for {EPOCHS} iterations with Learning Rate: {LEARNING_RATE}")

XOR_Model.train(X_data, Y_true, EPOCHS)

print("\n--- Training Finished ---")

Network compiled successfully.
XOR Model compiled successfully.
Training for 10000 iterations with Learning Rate: 0.1
iteration 1000/10000, Loss: 0.01897486
iteration 2000/10000, Loss: 0.00364021
iteration 3000/10000, Loss: 0.00180485
iteration 4000/10000, Loss: 0.00116567
iteration 5000/10000, Loss: 0.00085265
iteration 6000/10000, Loss: 0.00066884
iteration 7000/10000, Loss: 0.00054688
iteration 8000/10000, Loss: 0.00046273
iteration 9000/10000, Loss: 0.00039979
iteration 10000/10000, Loss: 0.00035147
iteration 10000/10000, Final Loss: 0.00035147

--- Training Finished ---


3. Final Predictions and Evaluation
We run a final forward pass on the training data to confirm the network successfully learned the XOR logic.

In [20]:
# Get the final predictions from the trained model
predictions = XOR_Model.forward(X_data)
rounded_predictions = np.round(predictions)

# Calculate Accuracy
accuracy = np.mean(rounded_predictions == Y_true) * 100

newData=np.array([[5,5],[-11,6],[1,1],[3,-3],[-1,-1]])
prediction_n = XOR_Model.forward(newData)
rounded_predictions_n = np.round(prediction_n)


# Print Results
print("\n--- Final Predictions ---")
print("Input (X) | True Label (Y) | Prediction (Y_pred) | Rounded")
print("-" * 50)
for x, y_true, y_pred, y_round in zip(X_data, Y_true, predictions, rounded_predictions):
    # Print the input, true label, the raw prediction, and the final rounded prediction
    print(f"  {x}    |    {y_true[0]}       |    {y_pred[0]:.4f}       |   {int(y_round[0])}")
print("\n--- Overall Metrics ---")
print(f"Final Accuracy: {accuracy:.2f}%\n")

print("\n--- New Data ---")
print("Input (X) |Prediction (Y_pred) |Rounded")
print("-" * 50)
for x_n, y_pred_n, y_round_n in zip(newData, prediction_n,  rounded_predictions_n):
    # Print the input, true label, the raw prediction, and the final rounded prediction
    print(f"  {x_n}     |    {y_pred_n[0]:.4f}    |   {int(y_round_n[0])}")



if accuracy == 100.0:
    print("Verification: The network achieved 100% accuracy on the XOR problem, confirming all components are working correctly.")
else:
    print("Verification: Training did not reach 100%. Check hyper-parameters.")


--- Final Predictions ---
Input (X) | True Label (Y) | Prediction (Y_pred) | Rounded
--------------------------------------------------
  [-1 -1]    |    0       |    0.0271       |   0
  [-1  1]    |    1       |    0.9712       |   1
  [ 1 -1]    |    1       |    0.9751       |   1
  [1 1]    |    0       |    0.0252       |   0

--- Overall Metrics ---
Final Accuracy: 100.00%


--- New Data ---
Input (X) |Prediction (Y_pred) |Rounded
--------------------------------------------------
  [5 5]     |    0.0254    |   0
  [-11   6]     |    0.6199    |   1
  [1 1]     |    0.0252    |   0
  [ 3 -3]     |    0.9612    |   1
  [-1 -1]     |    0.0271    |   0
Verification: The network achieved 100% accuracy on the XOR problem, confirming all components are working correctly.
