# Task 5.4 Solutions: Testing Suite

This notebook contains solutions to the exercises from notebook 04.

---

In [None]:
import numpy as np
import sys
sys.path.insert(0, '..')

from micrograd_plus import Tensor, Linear, ReLU, Sequential
from micrograd_plus.utils import numerical_gradient

## Exercise 1 Solution: Enhanced Gradient Checker

Implement a comprehensive gradient checker with detailed error reporting.

In [None]:
def enhanced_gradient_check(f, x, eps=1e-5, rtol=1e-3, atol=1e-5, verbose=True):
    """
    Enhanced gradient checker with detailed error reporting.
    
    Args:
        f: Function taking Tensor and returning scalar Tensor
        x: Input tensor with requires_grad=True
        eps: Perturbation for finite differences
        rtol: Relative tolerance
        atol: Absolute tolerance
        verbose: Print detailed information
        
    Returns:
        dict with pass/fail status and detailed metrics
    """
    # Compute analytical gradient
    x.zero_grad()
    y = f(x)
    y.backward()
    analytical = x.grad.copy()
    
    # Compute numerical gradient
    def numpy_f(arr):
        return f(Tensor(arr)).data.item()
    
    numerical = numerical_gradient(numpy_f, x.data.copy(), eps)
    
    # Compute errors
    abs_error = np.abs(analytical - numerical)
    rel_error = abs_error / (np.abs(numerical) + 1e-8)
    
    max_abs_error = np.max(abs_error)
    max_rel_error = np.max(rel_error)
    mean_abs_error = np.mean(abs_error)
    mean_rel_error = np.mean(rel_error)
    
    # Check if passed
    passed = np.allclose(analytical, numerical, rtol=rtol, atol=atol)
    
    # Find worst element
    worst_idx = np.unravel_index(np.argmax(abs_error), abs_error.shape)
    
    result = {
        'passed': passed,
        'max_abs_error': max_abs_error,
        'max_rel_error': max_rel_error,
        'mean_abs_error': mean_abs_error,
        'mean_rel_error': mean_rel_error,
        'worst_index': worst_idx,
        'analytical': analytical,
        'numerical': numerical
    }
    
    if verbose:
        status = "PASSED" if passed else "FAILED"
        print(f"Gradient Check: {status}")
        print(f"  Max Absolute Error: {max_abs_error:.2e}")
        print(f"  Max Relative Error: {max_rel_error:.2e}")
        print(f"  Mean Absolute Error: {mean_abs_error:.2e}")
        print(f"  Mean Relative Error: {mean_rel_error:.2e}")
        if not passed:
            print(f"  Worst element at index: {worst_idx}")
            print(f"    Analytical: {analytical[worst_idx]:.6e}")
            print(f"    Numerical:  {numerical[worst_idx]:.6e}")
    
    return result

In [None]:
# Test the enhanced gradient checker
print("Testing Enhanced Gradient Checker")
print("=" * 50)

# Test 1: Simple function (should pass)
print("\nTest 1: f(x) = x^2")
x = Tensor([1.0, 2.0, 3.0], requires_grad=True)
result = enhanced_gradient_check(lambda t: (t ** 2).sum(), x)

# Test 2: Complex function
print("\nTest 2: f(x) = sigmoid(x^2 + 2x)")
x = Tensor([0.5, 1.0, 1.5], requires_grad=True)
result = enhanced_gradient_check(lambda t: (t ** 2 + t * 2).sigmoid().sum(), x)

---

## Exercise 2 Solution: Layer-wise Testing Framework

Create a comprehensive test for any layer.

In [None]:
class LayerTester:
    """
    Comprehensive testing framework for neural network layers.
    
    Tests:
    1. Forward pass produces correct shapes
    2. Backward pass computes gradients
    3. Gradients are numerically correct
    4. Parameters are updated correctly
    5. Train/eval mode behavior
    """
    
    def __init__(self, layer, input_shape, seed=42):
        self.layer = layer
        self.input_shape = input_shape
        self.seed = seed
        np.random.seed(seed)
    
    def test_forward_shape(self):
        """Test that forward pass produces output."""
        x = Tensor(np.random.randn(*self.input_shape).astype(np.float32))
        try:
            y = self.layer(x)
            return {'passed': True, 'output_shape': y.shape}
        except Exception as e:
            return {'passed': False, 'error': str(e)}
    
    def test_backward_exists(self):
        """Test that backward pass computes gradients."""
        x = Tensor(np.random.randn(*self.input_shape).astype(np.float32), requires_grad=True)
        try:
            y = self.layer(x)
            y.sum().backward()
            has_grad = x.grad is not None
            return {'passed': has_grad, 'has_input_grad': has_grad}
        except Exception as e:
            return {'passed': False, 'error': str(e)}
    
    def test_gradient_numerical(self, eps=1e-5, atol=1e-4):
        """Test gradients against numerical computation."""
        x = Tensor(np.random.randn(*self.input_shape).astype(np.float32), requires_grad=True)
        
        def f(t):
            return self.layer(t).sum()
        
        result = enhanced_gradient_check(f, x, eps=eps, atol=atol, verbose=False)
        return {'passed': result['passed'], 'max_error': result['max_abs_error']}
    
    def test_parameters(self):
        """Test that parameters are tracked and updatable."""
        params = self.layer.parameters()
        has_params = len(params) > 0
        
        if has_params:
            # Test parameter update
            original = [p.data.copy() for p in params]
            
            x = Tensor(np.random.randn(*self.input_shape).astype(np.float32), requires_grad=True)
            y = self.layer(x)
            y.sum().backward()
            
            # Manual update
            for p in params:
                if p.grad is not None:
                    p.data -= 0.01 * p.grad
            
            changed = any(
                not np.allclose(orig, p.data)
                for orig, p in zip(original, params)
            )
            
            return {'passed': True, 'num_params': len(params), 'params_updated': changed}
        else:
            return {'passed': True, 'num_params': 0, 'params_updated': False}
    
    def test_train_eval_modes(self):
        """Test train/eval mode behavior."""
        x = Tensor(np.random.randn(*self.input_shape).astype(np.float32))
        
        self.layer.train()
        train_output = self.layer(x).data.copy()
        
        self.layer.eval()
        eval_output = self.layer(x).data.copy()
        
        # Check if outputs differ (e.g., dropout)
        outputs_differ = not np.allclose(train_output, eval_output)
        
        return {
            'passed': True,
            'outputs_differ': outputs_differ,
            'note': 'Outputs should differ if layer has dropout/batchnorm'
        }
    
    def run_all_tests(self):
        """Run all tests and return summary."""
        results = {
            'forward_shape': self.test_forward_shape(),
            'backward_exists': self.test_backward_exists(),
            'gradient_numerical': self.test_gradient_numerical(),
            'parameters': self.test_parameters(),
            'train_eval_modes': self.test_train_eval_modes()
        }
        
        all_passed = all(r['passed'] for r in results.values())
        results['all_passed'] = all_passed
        
        return results
    
    def print_report(self):
        """Run tests and print formatted report."""
        results = self.run_all_tests()
        
        print(f"\nLayer Test Report: {self.layer.__class__.__name__}")
        print("=" * 50)
        
        for test_name, result in results.items():
            if test_name == 'all_passed':
                continue
            status = "PASS" if result['passed'] else "FAIL"
            print(f"\n{test_name}: {status}")
            for key, value in result.items():
                if key != 'passed':
                    print(f"  {key}: {value}")
        
        print("\n" + "=" * 50)
        final_status = "ALL TESTS PASSED" if results['all_passed'] else "SOME TESTS FAILED"
        print(f"Final Status: {final_status}")
        
        return results

In [None]:
# Test the LayerTester
print("Testing LayerTester Framework")
print("=" * 50)

# Test Linear layer
linear = Linear(10, 5)
tester = LayerTester(linear, input_shape=(4, 10))
tester.print_report()

In [None]:
# Test Sequential model
model = Sequential(
    Linear(10, 20),
    ReLU(),
    Linear(20, 5)
)

tester = LayerTester(model, input_shape=(4, 10))
tester.print_report()

---

## Exercise 3 Solution: Performance Benchmarking

Create a benchmarking utility for measuring performance.

In [None]:
import time

class PerformanceBenchmark:
    """
    Benchmark forward/backward pass performance.
    """
    
    def __init__(self, model, input_shape, num_warmup=5, num_runs=20):
        self.model = model
        self.input_shape = input_shape
        self.num_warmup = num_warmup
        self.num_runs = num_runs
    
    def benchmark_forward(self):
        """Benchmark forward pass."""
        x = Tensor(np.random.randn(*self.input_shape).astype(np.float32))
        
        # Warmup
        for _ in range(self.num_warmup):
            _ = self.model(x)
        
        # Benchmark
        times = []
        for _ in range(self.num_runs):
            start = time.perf_counter()
            _ = self.model(x)
            end = time.perf_counter()
            times.append(end - start)
        
        return {
            'mean_ms': np.mean(times) * 1000,
            'std_ms': np.std(times) * 1000,
            'min_ms': np.min(times) * 1000,
            'max_ms': np.max(times) * 1000
        }
    
    def benchmark_backward(self):
        """Benchmark backward pass."""
        # Warmup
        for _ in range(self.num_warmup):
            x = Tensor(np.random.randn(*self.input_shape).astype(np.float32), requires_grad=True)
            y = self.model(x)
            y.sum().backward()
        
        # Benchmark
        times = []
        for _ in range(self.num_runs):
            x = Tensor(np.random.randn(*self.input_shape).astype(np.float32), requires_grad=True)
            y = self.model(x)
            
            start = time.perf_counter()
            y.sum().backward()
            end = time.perf_counter()
            times.append(end - start)
        
        return {
            'mean_ms': np.mean(times) * 1000,
            'std_ms': np.std(times) * 1000,
            'min_ms': np.min(times) * 1000,
            'max_ms': np.max(times) * 1000
        }
    
    def benchmark_full_step(self):
        """Benchmark full training step (forward + backward)."""
        from micrograd_plus import Adam
        
        optimizer = Adam(self.model.parameters(), lr=0.001)
        
        # Warmup
        for _ in range(self.num_warmup):
            x = Tensor(np.random.randn(*self.input_shape).astype(np.float32), requires_grad=True)
            y = self.model(x)
            optimizer.zero_grad()
            y.sum().backward()
            optimizer.step()
        
        # Benchmark
        times = []
        for _ in range(self.num_runs):
            x = Tensor(np.random.randn(*self.input_shape).astype(np.float32), requires_grad=True)
            
            start = time.perf_counter()
            y = self.model(x)
            optimizer.zero_grad()
            y.sum().backward()
            optimizer.step()
            end = time.perf_counter()
            
            times.append(end - start)
        
        return {
            'mean_ms': np.mean(times) * 1000,
            'std_ms': np.std(times) * 1000,
            'min_ms': np.min(times) * 1000,
            'max_ms': np.max(times) * 1000
        }
    
    def run_all(self):
        """Run all benchmarks."""
        return {
            'forward': self.benchmark_forward(),
            'backward': self.benchmark_backward(),
            'full_step': self.benchmark_full_step()
        }
    
    def print_report(self):
        """Print formatted benchmark report."""
        results = self.run_all()
        
        print(f"\nPerformance Benchmark Report")
        print(f"Model: {self.model.__class__.__name__}")
        print(f"Input shape: {self.input_shape}")
        print(f"Runs: {self.num_runs} (warmup: {self.num_warmup})")
        print("=" * 50)
        
        for benchmark, result in results.items():
            print(f"\n{benchmark.upper()}:")
            print(f"  Mean:  {result['mean_ms']:.3f} ms")
            print(f"  Std:   {result['std_ms']:.3f} ms")
            print(f"  Range: [{result['min_ms']:.3f}, {result['max_ms']:.3f}] ms")
        
        return results

In [None]:
# Run performance benchmark
model = Sequential(
    Linear(784, 256),
    ReLU(),
    Linear(256, 128),
    ReLU(),
    Linear(128, 10)
)

benchmark = PerformanceBenchmark(model, input_shape=(32, 784))
benchmark.print_report()

---

## Exercise 4 Solution: Test Coverage Analysis

Implement a simple test coverage tracker.

In [None]:
class CoverageTracker:
    """
    Simple coverage tracker for operations tested.
    """
    
    # All operations that should be tested
    TENSOR_OPS = [
        '__add__', '__sub__', '__mul__', '__truediv__', '__pow__',
        '__neg__', '__matmul__',
        'sum', 'mean', 'max', 'min',
        'reshape', 'flatten', 'transpose', 'squeeze', 'unsqueeze',
        'relu', 'sigmoid', 'tanh', 'softmax', 'log_softmax',
        'exp', 'log', 'sqrt', 'abs'
    ]
    
    LAYERS = [
        'Linear', 'ReLU', 'Sigmoid', 'Tanh', 'Softmax',
        'Dropout', 'BatchNorm', 'LayerNorm', 'Embedding', 'Sequential'
    ]
    
    LOSSES = [
        'MSELoss', 'CrossEntropyLoss', 'BCELoss', 'L1Loss', 'HuberLoss'
    ]
    
    OPTIMIZERS = [
        'SGD', 'Adam', 'AdamW', 'RMSprop'
    ]
    
    def __init__(self):
        self.tested = {
            'tensor_ops': set(),
            'layers': set(),
            'losses': set(),
            'optimizers': set()
        }
    
    def mark_tested(self, category, name):
        """Mark an operation as tested."""
        if category in self.tested:
            self.tested[category].add(name)
    
    def get_coverage(self, category):
        """Get coverage for a category."""
        all_items = getattr(self, category.upper(), [])
        tested = self.tested.get(category, set())
        
        return {
            'tested': list(tested),
            'not_tested': [item for item in all_items if item not in tested],
            'coverage': len(tested) / len(all_items) if all_items else 1.0
        }
    
    def print_report(self):
        """Print coverage report."""
        print("\nTest Coverage Report")
        print("=" * 50)
        
        total_items = 0
        total_tested = 0
        
        categories = ['tensor_ops', 'layers', 'losses', 'optimizers']
        for category in categories:
            coverage = self.get_coverage(category)
            all_items = getattr(self, category.upper(), [])
            tested = len(coverage['tested'])
            total = len(all_items)
            
            total_items += total
            total_tested += tested
            
            pct = coverage['coverage'] * 100
            print(f"\n{category.upper()}: {tested}/{total} ({pct:.1f}%)")
            
            if coverage['not_tested']:
                print(f"  Not tested: {', '.join(coverage['not_tested'][:5])}..." 
                      if len(coverage['not_tested']) > 5 
                      else f"  Not tested: {', '.join(coverage['not_tested'])}")
        
        overall = total_tested / total_items if total_items > 0 else 0
        print("\n" + "=" * 50)
        print(f"OVERALL COVERAGE: {total_tested}/{total_items} ({overall*100:.1f}%)")

In [None]:
# Demo coverage tracker
tracker = CoverageTracker()

# Simulate running tests
for op in ['__add__', '__sub__', '__mul__', 'sum', 'mean', 'relu', 'sigmoid']:
    tracker.mark_tested('tensor_ops', op)

for layer in ['Linear', 'ReLU', 'Sequential']:
    tracker.mark_tested('layers', layer)

for loss in ['MSELoss', 'CrossEntropyLoss']:
    tracker.mark_tested('losses', loss)

for opt in ['SGD', 'Adam']:
    tracker.mark_tested('optimizers', opt)

tracker.print_report()

---

## Key Takeaways

1. **Gradient Checking**: Always verify analytical gradients against numerical approximations

2. **Layer Testing**: Comprehensive tests should cover shapes, gradients, parameters, and modes

3. **Benchmarking**: Include warmup runs, measure multiple iterations, report statistics

4. **Coverage Tracking**: Know what you've tested and what's missing

5. **Reproducibility**: Always set random seeds for consistent test results