# Task 2.1: NumPy Broadcasting Lab - SOLUTIONS

**Module:** 2 - Python for AI/ML  

This notebook contains solutions to all exercises from the NumPy Broadcasting Lab.

---

In [None]:
import numpy as np
import time

print("Solutions Notebook for NumPy Broadcasting Lab")
print("=" * 50)

---

## Exercise 1: Zero-Mean Normalization

**Task:** Normalize each row to have zero mean.

In [None]:
# SOLUTION - Exercise 1
np.random.seed(42)
data = np.random.randn(5, 4)

print("Original data:")
print(data)
print(f"\nRow means before: {data.mean(axis=1).round(4)}")

# Solution: Use keepdims=True to maintain shape for broadcasting
row_means = data.mean(axis=1, keepdims=True)  # Shape: (5, 1)
normalized = data - row_means

print(f"\nRow means shape: {row_means.shape}")
print(f"Row means after: {normalized.mean(axis=1).round(10)}")
print("\n✅ All row means are now ~0!")

### Alternative Solution (without keepdims)

In [None]:
# Alternative: Use reshape or np.newaxis
row_means_v2 = data.mean(axis=1)[:, np.newaxis]  # (5,) -> (5, 1)
normalized_v2 = data - row_means_v2

print(f"Same result? {np.allclose(normalized, normalized_v2)}")

---

## Exercise 2: Cosine Similarity Matrix

**Task:** Compute cosine similarity between all pairs of embeddings.

In [None]:
# SOLUTION - Exercise 2
np.random.seed(42)
embeddings = np.random.randn(100, 64).astype(np.float32)

print(f"Embeddings shape: {embeddings.shape}")

# Step 1: Compute norm of each embedding
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)  # Shape: (100, 1)
print(f"Norms shape: {norms.shape}")

# Step 2: Normalize embeddings to unit length
normalized_embeddings = embeddings / norms

# Verify unit length
print(f"Normalized embedding norms: {np.linalg.norm(normalized_embeddings, axis=1)[:5].round(4)}")

# Step 3: Compute similarity matrix using @ operator
# For unit vectors, cosine similarity = dot product
similarity_matrix = normalized_embeddings @ normalized_embeddings.T

print(f"\nSimilarity matrix shape: {similarity_matrix.shape}")
print(f"Diagonal (self-similarity): {similarity_matrix.diagonal()[:5].round(4)}")
print(f"All diagonal values are 1.0? {np.allclose(similarity_matrix.diagonal(), 1.0)}")

# Sample off-diagonal similarities
print(f"\nSample similarities [0, 1:5]: {similarity_matrix[0, 1:5].round(4)}")

### Alternative: One-liner solution

In [None]:
# Compact one-liner
def cosine_similarity_matrix(X):
    """Compute pairwise cosine similarity."""
    X_norm = X / np.linalg.norm(X, axis=1, keepdims=True)
    return X_norm @ X_norm.T

sim_v2 = cosine_similarity_matrix(embeddings)
print(f"Same result? {np.allclose(similarity_matrix, sim_v2)}")

---

## Challenge: Mini Neural Network Forward Pass

**Task:** Implement a 2-layer network using only broadcasting.

In [None]:
# SOLUTION - Challenge
np.random.seed(42)

def relu(x):
    """ReLU activation function."""
    return np.maximum(0, x)

def softmax(x):
    """Numerically stable softmax."""
    x_max = x.max(axis=-1, keepdims=True)
    exp_x = np.exp(x - x_max)
    return exp_x / exp_x.sum(axis=-1, keepdims=True)

def forward(x, w1, b1, w2, b2):
    """
    Two-layer neural network forward pass.
    
    Args:
        x: Input (batch_size, 784)
        w1: First layer weights (784, 256)
        b1: First layer bias (256,)
        w2: Second layer weights (256, 10)
        b2: Second layer bias (10,)
    
    Returns:
        Probability distribution over 10 classes (batch_size, 10)
    """
    # Layer 1: Linear + ReLU
    z1 = x @ w1 + b1  # (batch, 784) @ (784, 256) + (256,) -> (batch, 256)
    h = relu(z1)
    
    # Layer 2: Linear + Softmax
    z2 = h @ w2 + b2  # (batch, 256) @ (256, 10) + (10,) -> (batch, 10)
    probs = softmax(z2)
    
    return probs

# Test the network
batch_size = 32
x = np.random.randn(batch_size, 784).astype(np.float32)
w1 = np.random.randn(784, 256).astype(np.float32) * 0.01
b1 = np.zeros(256, dtype=np.float32)
w2 = np.random.randn(256, 10).astype(np.float32) * 0.01
b2 = np.zeros(10, dtype=np.float32)

# Forward pass
probs = forward(x, w1, b1, w2, b2)

print(f"Input shape: {x.shape}")
print(f"Output shape: {probs.shape}")
print(f"\nProbabilities sum to 1? {np.allclose(probs.sum(axis=1), 1.0)}")
print(f"\nSample output (first batch item): {probs[0].round(4)}")
print(f"Predicted class: {np.argmax(probs[0])}")

print("\n✅ Mini neural network forward pass complete!")

### Extended Solution: With Intermediate Values

In [None]:
def forward_verbose(x, w1, b1, w2, b2):
    """Forward pass with intermediate value inspection."""
    print("Step-by-step forward pass:")
    print(f"  Input x:      {x.shape}")
    
    # Layer 1
    z1 = x @ w1
    print(f"  x @ w1:       {z1.shape}")
    
    z1 = z1 + b1  # Broadcasting: (batch, 256) + (256,)
    print(f"  + b1:         {z1.shape}")
    
    h = relu(z1)
    print(f"  ReLU:         {h.shape}, active neurons: {(h > 0).sum() / h.size * 100:.1f}%")
    
    # Layer 2
    z2 = h @ w2 + b2
    print(f"  h @ w2 + b2:  {z2.shape}")
    
    probs = softmax(z2)
    print(f"  Softmax:      {probs.shape}")
    
    return probs

_ = forward_verbose(x, w1, b1, w2, b2)

---

## Key Takeaways

1. **Use `keepdims=True`** when you need to broadcast back to original shape
2. **Cosine similarity** becomes a simple matrix multiply after normalization
3. **Neural network layers** are just matrix multiplies + broadcasting for bias
4. **ReLU and softmax** work element-wise and preserve shapes

---

**End of Solutions**