In [1]:
import numpy as np
%load_ext nb_js_diagrammers

---

In [2]:
%%mermaid_magic -h 850
graph LR
    %% Input Layer
    subgraph IL[Input Layer]
        I1[x₁]
        I2[x₂]
    end

    %% Hidden Layer with corrected numbering (top to bottom)
    subgraph HL[Hidden Layer]
        direction TB
        H4[h₄<br>ReLU]
        H3[h₃<br>ReLU]
        H2[h₂<br>ReLU]
        H1[h₁<br>ReLU]
    end

    %% Output Layer with corrected numbering (top to bottom)
    subgraph OL[Output Layer]
        direction TB
        O2[y₂<br>Softmax]
        O1[y₁<br>Softmax]
    end

    %% Connections from Input to Hidden Layer
    I1 --> H1
    I1 --> H2
    I1 --> H3
    I1 --> H4
    I2 --> H1
    I2 --> H2
    I2 --> H3
    I2 --> H4

    %% Connections from Hidden to Output Layer
    H1 --> O1
    H1 --> O2
    H2 --> O1
    H2 --> O2
    H3 --> O1
    H3 --> O2
    H4 --> O1
    H4 --> O2

    %% Annotations
    classDef inputClass fill:#f9f,stroke:#333,stroke-width:2px
    classDef hiddenClass fill:#bbf,stroke:#333,stroke-width:2px
    classDef outputClass fill:#bfb,stroke:#333,stroke-width:2px
    
    class I1,I2 inputClass
    class H1,H2,H3,H4 hiddenClass
    class O1,O2 outputClass

    %% Add annotations for more details
    annInput[Input Features<br>2 nodes]
    annHidden[Hidden Layer<br>4 nodes<br>ReLU Activation]
    annOutput[Output Layer<br>2 nodes<br>Softmax Activation]

    annInput -.-> IL
    annHidden -.-> HL
    annOutput -.-> OL

---

In [3]:
# Set random seed for reproducibility
np.random.seed(42)

def generate_sample_data(n_samples=6):
    """
    Generate random sample data with 2 features
    
    Args:
        n_samples: Number of samples to generate
        
    Returns:
        X: Input features array of shape (n_samples, 2)
    """
    X = np.random.randint(1, 11, size=(n_samples, 2))
    return X

def initialize_weights(input_size=2, hidden_size=4, output_size=2):
    """
    Initialize weights and biases for the neural network
    
    Args:
        input_size: Number of input features
        hidden_size: Number of nodes in hidden layer
        output_size: Number of output nodes
        
    Returns:
        Dictionary containing weights and biases
    """
    params = {
        # Input to hidden layer weights
        'W1': np.random.randn(input_size, hidden_size) * 0.01,
        
        # Hidden layer bias
        'b1': np.zeros((1, hidden_size)),
        # Hidden to output layer weights
        'W2': np.random.randn(hidden_size, output_size) * 0.01,
        # Output layer bias
        'b2': np.zeros((1, output_size))
    }
    return params


def relu(z):
    """
    ReLU activation function
    
    Args:
        z: Input to the activation function
        
    Returns:
        ReLU of input: max(0, z)
    """
    return np.maximum(0, z)


def softmax(z):
    """
    Softmax activation function
    
    Args:
        z: Input to the activation function, shape (batch_size, n_classes)
        
    Returns:
        Softmax probabilities with same shape as input
    """
    # Subtract max for numerical stability (prevents overflow)
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


def forward_propagation(X, params):
    """
    Perform forward propagation through the network
    
    Args:
        X: Input features array
        params: Dictionary containing weights and biases
        
    Returns:
        Dictionary containing activations and intermediate values
    """
    # Extract weights and biases
    W1, b1 = params['W1'], params['b1']
    W2, b2 = params['W2'], params['b2']
    
    # Hidden layer calculations
    Z1 = np.dot(X, W1) + b1           # Linear transformation
    A1 = relu(Z1)                     # ReLU Activation
    
    # Output layer calculations
    Z2 = np.dot(A1, W2) + b2  # Linear transformation
    A2 = softmax(Z2)        # Softmax activation
    
    cache = {
        'Z1': Z1, 'A1': A1,
        'Z2': Z2, 'A2': A2
    }
    
    return cache


def print_step_by_step(X, params, cache):
    """
    Print detailed step-by-step calculations for feed forward neural network
    
    Args:
        X: Input features
        params: Network parameters
        cache: Activation values from forward propagation
    """
    print("\nStep-by-Step Calculations:")
    print("-" * 50)
    
    # Input layer
    print("\nInput Layer:")
    print(f"Input features (X):\n\n{X}")
    
    # Hidden layer calculations
    print("\nHidden Layer Calculations:")
    print(f"Weights (W1):\n\n{params['W1']}")
    print(f"Biases (b1):\n\n{params['b1']}")
    
    print(f"Weights (W1) shape: {params['W1'].shape}")
    print(f"Biases (b1) shape: {params['b1'].shape}")
    print()
    
    print(f"Linear transformation (Z1 = X·W1 + b1):\n\n{cache['Z1']}")
    
    print()
    print(f"Activation (A1 = relu(Z1)):\n\n{cache['A1']}")
    print()
    
    # Output layer calculations
    print("\nOutput Layer Calculations:")
    print(f"Weights (W2):\n\n{params['W2']}")
    print(f"Biases (b2):\n\n{params['b2']}")
    print()
    
    print(f"Linear transformation (Z2 = A1·W2 + b2):\n\n{cache['Z2']}")
    
    print()
    print(f"Final Output (A2 = softmax(Z2)):\n\n{cache['A2']}")

In [17]:
def generate_target_data(n_samples=6, n_classes=2):
    """
    Generate target variables for classification
    
    Args:
        n_samples: Number of samples
        n_classes: Number of classes (output nodes)
        
    Returns:
        y: One-hot encoded target variables
    """
    # Generate random class labels (0 to n_classes-1)
    y_labels = np.random.randint(0, n_classes, size=n_samples)
    
    # Convert to one-hot encoding
    y = np.zeros((n_samples, n_classes))
    y[np.arange(n_samples), y_labels] = 1
    return y, y_labels

def predict(X, params):
    """
    Make predictions using the trained network
    
    Args:
        X: Input features
        params: Network parameters
        
    Returns:
        predictions: Predicted class labels
        probabilities: Class probabilities
    """
    # Forward pass
    cache = forward_propagation(X, params)
    
    # Get probabilities from output layer
    probabilities = cache['A2']
    
    # Get predicted class (argmax of probabilities)
    predictions = np.argmax(probabilities, axis=1)
    
    return predictions, probabilities


def evaluate_predictions(predictions, y_true, probabilities):
    """
    Print evaluation metrics for the predictions
    
    Args:
        predictions: Predicted class labels
        y_true: True class labels
        probabilities: Predicted probabilities
    """
    print("\nPrediction Results:")
    print("-" * 50)
    print("\nPredicted Probabilities:")
    print(probabilities)
    print("\nPredicted Classes:", predictions)
    print("True Classes:", y_true)
    
    # Calculate accuracy
    accuracy = np.mean(predictions == y_true)
    print(f"\nAccuracy: {accuracy:.2%}")

In [11]:
# Generate sample data
# np.random.seed(1010)
X = generate_sample_data()

# Initialize network parameters
params = initialize_weights()

In [15]:
# Perform forward propagation
cache = forward_propagation(X, params)

In [16]:
# Print detailed calculations
print_step_by_step(X, params, cache)


Step-by-Step Calculations:
--------------------------------------------------

Input Layer:
Input features (X):

[[10  2]
 [ 8  2]
 [ 6  1]
 [ 2  1]
 [10  2]
 [ 4  9]]

Hidden Layer Calculations:
Weights (W1):

[[ 0.00248006  0.00298038  0.00283817 -0.00471223]
 [ 0.00952028 -0.00638603 -0.01260901 -0.00558495]]
Biases (b1):

[[0. 0. 0. 0.]]
Weights (W1) shape: (2, 4)
Biases (b1) shape: (1, 4)

Linear transformation (Z1 = X·W1 + b1):

[[ 0.04384115  0.01703171  0.00316365 -0.05829219]
 [ 0.03888103  0.01107096 -0.00251268 -0.04886774]
 [ 0.02440063  0.01149623  0.00441999 -0.03385833]
 [ 0.0144804  -0.00042528 -0.00693267 -0.01500941]
 [ 0.04384115  0.01703171  0.00316365 -0.05829219]
 [ 0.09560276 -0.04555278 -0.1021284  -0.06911348]]

Activation (A1 = relu(Z1)):

[[0.04384115 0.01703171 0.00316365 0.        ]
 [0.03888103 0.01107096 0.         0.        ]
 [0.02440063 0.01149623 0.00441999 0.        ]
 [0.0144804  0.         0.         0.        ]
 [0.04384115 0.01703171 0.00316365 

In [18]:
# Generate target variables
y, y_true = generate_target_data()

# Make predictions
predictions, probabilities = predict(X, params)

# Evaluate predictions
evaluate_predictions(predictions, y_true, probabilities)


Prediction Results:
--------------------------------------------------

Predicted Probabilities:
[[0.49996604 0.50003396]
 [0.49999097 0.50000903]
 [0.49996622 0.50003378]
 [0.50000828 0.49999172]
 [0.49996604 0.50003396]
 [0.50005466 0.49994534]]

Predicted Classes: [1 1 1 0 1 0]
True Classes: [1 0 1 1 0 1]

Accuracy: 33.33%
