# Les 11: Labo - Oplossingen

**Mathematical Foundations - IT & Artificial Intelligence**

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod

np.set_printoptions(precision=4, suppress=True)
np.random.seed(42)

print("Libraries geladen!")

In [None]:
# Base classes
class Layer(ABC):
    def __init__(self):
        self.params = {}
        self.grads = {}
        self.training = True
    
    @abstractmethod
    def forward(self, x): pass
    
    @abstractmethod
    def backward(self, dout): pass
    
    def __call__(self, x): return self.forward(x)
    def train(self): self.training = True
    def eval(self): self.training = False

class Loss(ABC):
    @abstractmethod
    def forward(self, y_pred, y_true): pass
    @abstractmethod
    def backward(self): pass
    def __call__(self, y_pred, y_true): return self.forward(y_pred, y_true)

---

## Oefening 1: Linear Layer - Oplossingen

In [None]:
# Opdracht 1a
class Linear(Layer):
    def __init__(self, in_features, out_features):
        super().__init__()
        # He initialisatie
        std = np.sqrt(2.0 / in_features)
        self.params['W'] = np.random.randn(in_features, out_features) * std
        self.params['b'] = np.zeros(out_features)
    
    def forward(self, x):
        self.x = x
        return x @ self.params['W'] + self.params['b']
    
    def backward(self, dout):
        n = self.x.shape[0]
        self.grads['W'] = self.x.T @ dout / n
        self.grads['b'] = np.mean(dout, axis=0)
        return dout @ self.params['W'].T

# Test
linear = Linear(3, 2)
x = np.random.randn(4, 3)
out = linear.forward(x)
print(f"Input shape: {x.shape}")
print(f"Output shape: {out.shape}")

In [None]:
# Opdracht 1b - Gradient check
def gradient_check(layer, x, eps=1e-5):
    """Check gradients numerically."""
    # Forward
    out = layer.forward(x)
    dout = np.ones_like(out)
    layer.backward(dout)
    
    # Check W gradients
    for i in range(layer.params['W'].shape[0]):
        for j in range(layer.params['W'].shape[1]):
            old_val = layer.params['W'][i, j]
            
            layer.params['W'][i, j] = old_val + eps
            out_plus = np.sum(layer.forward(x))
            
            layer.params['W'][i, j] = old_val - eps
            out_minus = np.sum(layer.forward(x))
            
            layer.params['W'][i, j] = old_val
            
            numerical_grad = (out_plus - out_minus) / (2 * eps) / x.shape[0]
            analytical_grad = layer.grads['W'][i, j]
            
            diff = abs(numerical_grad - analytical_grad)
            if diff > 1e-5:
                print(f"Gradient mismatch at W[{i},{j}]: numerical={numerical_grad:.6f}, analytical={analytical_grad:.6f}")
                return False
    
    print("Gradient check passed!")
    return True

gradient_check(Linear(3, 2), np.random.randn(4, 3))

---

## Oefening 2: Activatiefuncties - Oplossingen

In [None]:
# Opdracht 2a
class ReLU(Layer):
    def forward(self, x):
        self.mask = (x > 0)
        return np.maximum(0, x)
    
    def backward(self, dout):
        return dout * self.mask

class Sigmoid(Layer):
    def forward(self, x):
        self.out = 1 / (1 + np.exp(-np.clip(x, -500, 500)))
        return self.out
    
    def backward(self, dout):
        return dout * self.out * (1 - self.out)

# Test
x = np.array([-2, -1, 0, 1, 2])
relu = ReLU()
print(f"ReLU({x}) = {relu.forward(x)}")

In [None]:
# Opdracht 2b - ELU
class ELU(Layer):
    def __init__(self, alpha=1.0):
        super().__init__()
        self.alpha = alpha
    
    def forward(self, x):
        self.x = x
        self.mask = x > 0
        return np.where(self.mask, x, self.alpha * (np.exp(x) - 1))
    
    def backward(self, dout):
        # f'(x) = 1 if x > 0, else alpha * e^x
        return dout * np.where(self.mask, 1, self.alpha * np.exp(self.x))

# Visualiseer
x = np.linspace(-3, 3, 100)
elu = ELU(alpha=1.0)
y = elu.forward(x)
dy = elu.backward(np.ones_like(x))

plt.figure(figsize=(10, 5))
plt.plot(x, y, 'b-', linewidth=2, label='ELU(x)')
plt.plot(x, dy, 'r--', linewidth=2, label="ELU'(x)")
plt.axhline(y=0, color='k', linewidth=0.5)
plt.axvline(x=0, color='k', linewidth=0.5)
plt.xlabel('x')
plt.ylabel('y')
plt.title('ELU Activation Function')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

---

## Oefening 3: Loss Functions - Oplossingen

In [None]:
# Opdracht 3a
class MSELoss(Loss):
    def forward(self, y_pred, y_true):
        self.y_pred = y_pred
        self.y_true = y_true
        return np.mean((y_pred - y_true) ** 2)
    
    def backward(self):
        n = self.y_pred.shape[0]
        return 2 * (self.y_pred - self.y_true) / n

class CrossEntropyLoss(Loss):
    def forward(self, logits, y_true):
        self.y_true = y_true
        n = logits.shape[0]
        
        # Softmax
        exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        self.probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
        
        # Cross-entropy
        correct_logprobs = -np.log(self.probs[np.arange(n), y_true] + 1e-10)
        return np.mean(correct_logprobs)
    
    def backward(self):
        n = self.probs.shape[0]
        grad = self.probs.copy()
        grad[np.arange(n), self.y_true] -= 1
        return grad / n

# Test
logits = np.array([[2.0, 1.0, 0.1], [0.5, 2.0, 0.3]])
y_true = np.array([0, 1])

ce_loss = CrossEntropyLoss()
loss = ce_loss.forward(logits, y_true)
grad = ce_loss.backward()

print(f"Loss: {loss:.4f}")
print(f"Probs:\n{ce_loss.probs}")
print(f"Gradient:\n{grad}")

---

## Oefening 4: SGD Optimizer - Oplossingen

In [None]:
# Opdracht 4a
class SGD:
    def __init__(self, layers, lr=0.01, momentum=0, weight_decay=0):
        self.layers = layers
        self.lr = lr
        self.momentum = momentum
        self.weight_decay = weight_decay
        self.velocity = {}
    
    def step(self):
        for i, layer in enumerate(self.layers):
            for name, param in layer.params.items():
                if name not in layer.grads:
                    continue
                
                key = (i, name)
                grad = layer.grads[name]
                
                # Weight decay
                if self.weight_decay > 0:
                    grad = grad + self.weight_decay * param
                
                # Momentum
                if self.momentum > 0:
                    if key not in self.velocity:
                        self.velocity[key] = np.zeros_like(param)
                    self.velocity[key] = self.momentum * self.velocity[key] - self.lr * grad
                    layer.params[name] += self.velocity[key]
                else:
                    layer.params[name] -= self.lr * grad
    
    def zero_grad(self):
        for layer in self.layers:
            layer.grads = {}

---

## Oefening 5: Sequential Model - Oplossingen

In [None]:
# Opdracht 5a
class Sequential:
    def __init__(self, layers):
        self.layers = layers
    
    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward(self, dout):
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
    
    def __call__(self, x):
        return self.forward(x)
    
    def train(self):
        for layer in self.layers:
            layer.training = True
    
    def eval(self):
        for layer in self.layers:
            layer.training = False
    
    def parameters(self):
        return [l for l in self.layers if l.params]

# Test
model = Sequential([
    Linear(10, 5),
    ReLU(),
    Linear(5, 2)
])

x = np.random.randn(3, 10)
out = model(x)
print(f"Output shape: {out.shape}")

---

## Oefening 6: Train MNIST - Oplossingen

In [None]:
from sklearn.datasets import fetch_openml

print("MNIST laden...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
X, y = mnist.data / 255.0, mnist.target.astype(int)
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

print(f"Training: {X_train.shape}")

In [None]:
# Bouw model
model = Sequential([
    Linear(784, 256),
    ReLU(),
    Linear(256, 128),
    ReLU(),
    Linear(128, 10)
])

criterion = CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9)

# Training
batch_size = 128
n_epochs = 5
n_batches = len(X_train) // batch_size

for epoch in range(n_epochs):
    idx = np.random.permutation(len(X_train))
    epoch_loss = 0
    
    for batch in range(n_batches):
        start = batch * batch_size
        X_batch = X_train[idx[start:start+batch_size]]
        y_batch = y_train[idx[start:start+batch_size]]
        
        # Forward
        logits = model(X_batch)
        loss = criterion(logits, y_batch)
        epoch_loss += loss
        
        # Backward
        optimizer.zero_grad()
        dout = criterion.backward()
        model.backward(dout)
        optimizer.step()
    
    # Evaluate
    logits = model(X_test)
    preds = np.argmax(logits, axis=1)
    acc = np.mean(preds == y_test)
    
    print(f"Epoch {epoch+1}: Loss={epoch_loss/n_batches:.4f}, Test Acc={acc:.4f}")

---

## Oefening 7: Dropout - Oplossingen

In [None]:
# Opdracht 7a
class Dropout(Layer):
    def __init__(self, p=0.5):
        super().__init__()
        self.p = p  # Dropout probability
    
    def forward(self, x):
        if self.training:
            # Create mask: keep probability is (1-p)
            self.mask = (np.random.random(x.shape) > self.p) / (1 - self.p)
            return x * self.mask
        else:
            return x
    
    def backward(self, dout):
        if self.training:
            return dout * self.mask
        else:
            return dout

# Test
dropout = Dropout(p=0.5)
x = np.ones((3, 4))

dropout.train()
out_train = dropout.forward(x)
print(f"Training (met dropout):\n{out_train}")

dropout.eval()
out_eval = dropout.forward(x)
print(f"\nEvaluation (geen dropout):\n{out_eval}")

---

## Oefening 8: Experimenten - Oplossingen

In [None]:
# Vergelijk activatiefuncties
def train_and_evaluate(model, n_epochs=3):
    criterion = CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9)
    
    accs = []
    for epoch in range(n_epochs):
        idx = np.random.permutation(len(X_train))
        for batch in range(100):  # Subset for speed
            start = batch * batch_size
            X_batch = X_train[idx[start:start+batch_size]]
            y_batch = y_train[idx[start:start+batch_size]]
            
            logits = model(X_batch)
            loss = criterion(logits, y_batch)
            
            optimizer.zero_grad()
            model.backward(criterion.backward())
            optimizer.step()
        
        # Evaluate
        logits = model(X_test[:1000])
        acc = np.mean(np.argmax(logits, axis=1) == y_test[:1000])
        accs.append(acc)
    
    return accs

results = {}

# ReLU
np.random.seed(42)
model_relu = Sequential([Linear(784, 128), ReLU(), Linear(128, 10)])
results['ReLU'] = train_and_evaluate(model_relu)

# Sigmoid
np.random.seed(42)
model_sigmoid = Sequential([Linear(784, 128), Sigmoid(), Linear(128, 10)])
results['Sigmoid'] = train_and_evaluate(model_sigmoid)

# ELU
np.random.seed(42)
model_elu = Sequential([Linear(784, 128), ELU(), Linear(128, 10)])
results['ELU'] = train_and_evaluate(model_elu)

# Plot
plt.figure(figsize=(10, 5))
for name, accs in results.items():
    plt.plot(accs, 'o-', label=name)
plt.xlabel('Epoch')
plt.ylabel('Test Accuracy')
plt.title('Vergelijking Activatiefuncties')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

---

## Bonusoefening: Conv2D - Oplossing

In [None]:
class Conv2D(Layer):
    """Simple 2D convolution (no padding, stride=1)."""
    
    def __init__(self, in_channels, out_channels, kernel_size):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        
        # He initialization
        std = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
        self.params['W'] = np.random.randn(out_channels, in_channels, kernel_size, kernel_size) * std
        self.params['b'] = np.zeros(out_channels)
    
    def forward(self, x):
        # x: (batch, in_channels, H, W)
        self.x = x
        batch, _, H, W = x.shape
        k = self.kernel_size
        H_out = H - k + 1
        W_out = W - k + 1
        
        out = np.zeros((batch, self.out_channels, H_out, W_out))
        
        for i in range(H_out):
            for j in range(W_out):
                region = x[:, :, i:i+k, j:j+k]  # (batch, in_ch, k, k)
                for f in range(self.out_channels):
                    out[:, f, i, j] = np.sum(region * self.params['W'][f], axis=(1,2,3)) + self.params['b'][f]
        
        return out
    
    def backward(self, dout):
        batch, _, H, W = self.x.shape
        k = self.kernel_size
        H_out, W_out = dout.shape[2], dout.shape[3]
        
        self.grads['W'] = np.zeros_like(self.params['W'])
        self.grads['b'] = np.sum(dout, axis=(0, 2, 3))
        dx = np.zeros_like(self.x)
        
        for i in range(H_out):
            for j in range(W_out):
                region = self.x[:, :, i:i+k, j:j+k]
                for f in range(self.out_channels):
                    self.grads['W'][f] += np.sum(region * dout[:, f, i, j][:, None, None, None], axis=0)
                    dx[:, :, i:i+k, j:j+k] += self.params['W'][f] * dout[:, f, i, j][:, None, None, None]
        
        self.grads['W'] /= batch
        return dx

# Test
conv = Conv2D(1, 2, 3)  # 1 input channel, 2 filters, 3x3 kernel
x = np.random.randn(2, 1, 5, 5)  # 2 images, 1 channel, 5x5
out = conv.forward(x)
print(f"Input shape: {x.shape}")
print(f"Output shape: {out.shape}")

---

**Mathematical Foundations** | Les 11 Oplossingen | IT & Artificial Intelligence

---