# Les 7: Labo - Oplossingen

**Mathematical Foundations - IT & Artificial Intelligence**

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.set_printoptions(precision=4, suppress=True)
np.random.seed(42)

print("Libraries geladen!")

---

## Oefening 1: Computational Graph Tekenen - Oplossingen

### 1. f(x, y) = (x + y) * (x - y)

```
x ──┬──[+]── a ──┐
    │            [*]── f
y ──┴──[-]── b ──┘

Tussenresultaten:
- a = x + y
- b = x - y
- f = a * b
```

### 2. g(x, w, b) = σ(wx + b)

```
x ──[*w]── z1 ──[+b]── z2 ──[σ]── g

Tussenresultaten:
- z1 = w * x
- z2 = z1 + b
- g = σ(z2)
```

### 3. h(x, W1, b1, W2, b2) = W2 @ ReLU(W1 @ x + b1) + b2

```
x ──[@W1]── z1 ──[+b1]── z2 ──[ReLU]── a1 ──[@W2]── z3 ──[+b2]── h

Tussenresultaten:
- z1 = W1 @ x
- z2 = z1 + b1
- a1 = ReLU(z2)
- z3 = W2 @ a1
- h = z3 + b2
```

---

## Oefening 2: Lokale Gradiënten - Oplossingen

In [None]:
# Opdracht 2a
class Add:
    def forward(self, x, y):
        return x + y
    
    def backward(self, dout):
        # z = x + y
        # ∂z/∂x = 1, ∂z/∂y = 1
        dx = dout * 1
        dy = dout * 1
        return dx, dy

class Multiply:
    def forward(self, x, y):
        self.x = x
        self.y = y
        return x * y
    
    def backward(self, dout):
        # z = x * y
        # ∂z/∂x = y, ∂z/∂y = x
        dx = dout * self.y
        dy = dout * self.x
        return dx, dy

class Power:
    def __init__(self, n):
        self.n = n
    
    def forward(self, x):
        self.x = x
        return x ** self.n
    
    def backward(self, dout):
        # z = x^n
        # ∂z/∂x = n * x^(n-1)
        dx = dout * self.n * self.x ** (self.n - 1)
        return dx

# Test
add = Add()
z = add.forward(3, 5)
dx, dy = add.backward(1)
print(f"Add: 3 + 5 = {z}, ∂z/∂x = {dx}, ∂z/∂y = {dy}")

mul = Multiply()
z = mul.forward(3, 5)
dx, dy = mul.backward(1)
print(f"Mul: 3 * 5 = {z}, ∂z/∂x = {dx}, ∂z/∂y = {dy}")

pow3 = Power(3)
z = pow3.forward(2)
dx = pow3.backward(1)
print(f"Pow: 2^3 = {z}, ∂z/∂x = {dx}")

In [None]:
# Opdracht 2b
class Sigmoid:
    def forward(self, x):
        self.out = 1 / (1 + np.exp(-np.clip(x, -500, 500)))
        return self.out
    
    def backward(self, dout):
        # σ'(x) = σ(x)(1 - σ(x))
        return dout * self.out * (1 - self.out)

class Tanh:
    def forward(self, x):
        self.out = np.tanh(x)
        return self.out
    
    def backward(self, dout):
        # tanh'(x) = 1 - tanh²(x)
        return dout * (1 - self.out ** 2)

# Test
sig = Sigmoid()
y = sig.forward(1.0)
dx = sig.backward(1.0)
print(f"Sigmoid(1) = {y:.4f}, σ'(1) = {dx:.4f}")

# Numerieke verificatie
h = 1e-5
sig2 = Sigmoid()
num_grad = (sig2.forward(1 + h) - sig.forward(1 - h)) / (2*h)
print(f"Numerieke gradiënt: {num_grad:.4f}")

---

## Oefening 3: Backprop met de Hand - Oplossingen

In [None]:
# Opdracht 3a
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Gegeven
x = 2
w1, b1 = 0.5, 0.1
w2, b2 = -0.3, 0.2
y_target = 0.5

print("=== FORWARD PASS ===")
# h = ReLU(w1*x + b1)
z1 = w1 * x + b1
print(f"z1 = w1*x + b1 = {w1}*{x} + {b1} = {z1}")

h = max(0, z1)  # ReLU
print(f"h = ReLU(z1) = {h}")

# y = sigmoid(w2*h + b2)
z2 = w2 * h + b2
print(f"z2 = w2*h + b2 = {w2}*{h} + {b2} = {z2}")

y = sigmoid(z2)
print(f"y = σ(z2) = {y:.6f}")

# L = (y - y_target)²
L = (y - y_target) ** 2
print(f"L = (y - y_target)² = ({y:.6f} - {y_target})² = {L:.6f}")

In [None]:
print("\n=== BACKWARD PASS ===")

# ∂L/∂y = 2(y - y_target)
dL_dy = 2 * (y - y_target)
print(f"∂L/∂y = 2(y - y_target) = 2*({y:.6f} - {y_target}) = {dL_dy:.6f}")

# ∂L/∂z2 = ∂L/∂y * ∂y/∂z2 = ∂L/∂y * σ'(z2) = ∂L/∂y * y*(1-y)
dy_dz2 = y * (1 - y)
dL_dz2 = dL_dy * dy_dz2
print(f"∂y/∂z2 = σ(z2)*(1-σ(z2)) = {y:.6f}*(1-{y:.6f}) = {dy_dz2:.6f}")
print(f"∂L/∂z2 = {dL_dz2:.6f}")

# ∂L/∂w2 = ∂L/∂z2 * ∂z2/∂w2 = ∂L/∂z2 * h
dL_dw2 = dL_dz2 * h
print(f"∂L/∂w2 = ∂L/∂z2 * h = {dL_dz2:.6f} * {h} = {dL_dw2:.6f}")

# ∂L/∂b2 = ∂L/∂z2 * 1
dL_db2 = dL_dz2
print(f"∂L/∂b2 = {dL_db2:.6f}")

# ∂L/∂h = ∂L/∂z2 * ∂z2/∂h = ∂L/∂z2 * w2
dL_dh = dL_dz2 * w2
print(f"∂L/∂h = ∂L/∂z2 * w2 = {dL_dz2:.6f} * {w2} = {dL_dh:.6f}")

# ∂L/∂z1 = ∂L/∂h * ∂h/∂z1 = ∂L/∂h * ReLU'(z1)
# ReLU'(z1) = 1 als z1 > 0, anders 0
relu_grad = 1 if z1 > 0 else 0
dL_dz1 = dL_dh * relu_grad
print(f"ReLU'(z1) = {relu_grad} (want z1 = {z1} > 0)")
print(f"∂L/∂z1 = {dL_dz1:.6f}")

# ∂L/∂w1 = ∂L/∂z1 * x
dL_dw1 = dL_dz1 * x
print(f"∂L/∂w1 = ∂L/∂z1 * x = {dL_dz1:.6f} * {x} = {dL_dw1:.6f}")

# ∂L/∂b1 = ∂L/∂z1 * 1
dL_db1 = dL_dz1
print(f"∂L/∂b1 = {dL_db1:.6f}")

In [None]:
# Numerieke verificatie
def full_forward(x, w1, b1, w2, b2, y_target):
    z1 = w1 * x + b1
    h = max(0, z1)
    z2 = w2 * h + b2
    y = sigmoid(z2)
    L = (y - y_target) ** 2
    return L

h = 1e-5
print("\n=== NUMERIEKE VERIFICATIE ===")

num_dw1 = (full_forward(x, w1+h, b1, w2, b2, y_target) - full_forward(x, w1-h, b1, w2, b2, y_target)) / (2*h)
print(f"∂L/∂w1: analytisch={dL_dw1:.6f}, numeriek={num_dw1:.6f}")

num_dw2 = (full_forward(x, w1, b1, w2+h, b2, y_target) - full_forward(x, w1, b1, w2-h, b2, y_target)) / (2*h)
print(f"∂L/∂w2: analytisch={dL_dw2:.6f}, numeriek={num_dw2:.6f}")

---

## Oefening 4: Gradient Checking - Oplossingen

In [None]:
# Opdracht 4a
def gradient_check(f, x, analytic_grad, h=1e-5):
    """
    Vergelijk analytische gradiënt met numerieke gradiënt.
    """
    # Numerieke gradiënt (central difference)
    numeric_grad = (f(x + h) - f(x - h)) / (2 * h)
    
    # Relatieve fout
    diff = abs(analytic_grad - numeric_grad)
    denom = max(abs(analytic_grad), abs(numeric_grad), 1e-8)
    relative_error = diff / denom
    
    return relative_error, numeric_grad

# Test met f(x) = x²
f = lambda x: x**2
x = 3.0
analytic = 2 * x  # f'(x) = 2x

error, numeric = gradient_check(f, x, analytic)
print(f"f(x) = x², x = {x}")
print(f"Analytische gradiënt: {analytic}")
print(f"Numerieke gradiënt: {numeric:.6f}")
print(f"Relatieve fout: {error:.2e}")

In [None]:
# Opdracht 4b - Sigmoid verificatie
sig = Sigmoid()

for x_test in [-2, 0, 1, 3]:
    # Forward en backward
    y = sig.forward(x_test)
    analytic = sig.backward(1.0)  # dout = 1
    
    # Numerieke check
    f = lambda x: sigmoid(x)
    error, numeric = gradient_check(f, x_test, analytic)
    
    print(f"x={x_test:2}: analytic={analytic:.6f}, numeric={numeric:.6f}, error={error:.2e}")

---

## Oefening 5: Layer Class met Backward - Oplossingen

In [None]:
# Opdracht 5a
class LinearLayer:
    def __init__(self, input_dim, output_dim):
        # Xavier initialisatie
        self.W = np.random.randn(input_dim, output_dim) * np.sqrt(2.0 / input_dim)
        self.b = np.zeros(output_dim)
        self.dW = None
        self.db = None
    
    def forward(self, X):
        self.X = X  # Cache voor backward
        return X @ self.W + self.b
    
    def backward(self, dout):
        N = self.X.shape[0]
        
        # Gradiënten naar parameters
        self.dW = self.X.T @ dout / N
        self.db = np.mean(dout, axis=0)
        
        # Gradiënt naar input
        dX = dout @ self.W.T
        return dX

# Test
layer = LinearLayer(3, 2)
X = np.random.randn(4, 3)  # 4 samples, 3 features
out = layer.forward(X)
print(f"Input shape: {X.shape}")
print(f"Output shape: {out.shape}")

# Backward test
dout = np.random.randn(4, 2)
dX = layer.backward(dout)
print(f"dX shape: {dX.shape}")
print(f"dW shape: {layer.dW.shape}")
print(f"db shape: {layer.db.shape}")

In [None]:
# Opdracht 5b
class ReLULayer:
    def forward(self, X):
        self.mask = (X > 0)
        return np.maximum(0, X)
    
    def backward(self, dout):
        return dout * self.mask

# Test
relu = ReLULayer()
X = np.array([[-1, 2, -3], [4, -5, 6]])
out = relu.forward(X)
print(f"Input:\n{X}")
print(f"ReLU output:\n{out}")

dout = np.ones_like(X)
dX = relu.backward(dout)
print(f"Backward (dout=1):\n{dX}")

---

## Oefening 6: Mini-netwerk Trainen - Oplossingen

In [None]:
# Opdracht 6a - XOR probleem
np.random.seed(42)
n_samples = 200

X = np.random.randn(n_samples, 2)
y = ((X[:, 0] > 0) == (X[:, 1] > 0)).astype(float).reshape(-1, 1)

# Netwerk voor binaire classificatie
class XORNetwork:
    def __init__(self):
        self.layer1 = LinearLayer(2, 16)
        self.relu1 = ReLULayer()
        self.layer2 = LinearLayer(16, 8)
        self.relu2 = ReLULayer()
        self.layer3 = LinearLayer(8, 1)
    
    def forward(self, X):
        self.z1 = self.layer1.forward(X)
        self.a1 = self.relu1.forward(self.z1)
        self.z2 = self.layer2.forward(self.a1)
        self.a2 = self.relu2.forward(self.z2)
        self.z3 = self.layer3.forward(self.a2)
        self.out = 1 / (1 + np.exp(-self.z3))  # Sigmoid
        return self.out
    
    def backward(self, y_true):
        # Binary cross-entropy gradient + sigmoid = y_pred - y_true
        dout = (self.out - y_true) / len(y_true)
        
        # Sigmoid gradient
        dz3 = dout * self.out * (1 - self.out)
        
        da2 = self.layer3.backward(dz3)
        dz2 = self.relu2.backward(da2)
        da1 = self.layer2.backward(dz2)
        dz1 = self.relu1.backward(da1)
        self.layer1.backward(dz1)
    
    def update(self, lr):
        for layer in [self.layer1, self.layer2, self.layer3]:
            layer.W -= lr * layer.dW
            layer.b -= lr * layer.db
    
    def predict(self, X):
        return (self.forward(X) > 0.5).astype(float)

In [None]:
# Train
model = XORNetwork()
lr = 1.0
losses = []

for epoch in range(1000):
    # Forward
    out = model.forward(X)
    
    # Loss (binary cross-entropy)
    loss = -np.mean(y * np.log(out + 1e-10) + (1-y) * np.log(1-out + 1e-10))
    losses.append(loss)
    
    # Backward
    model.backward(y)
    model.update(lr)
    
    if epoch % 200 == 0:
        acc = np.mean(model.predict(X) == y)
        print(f"Epoch {epoch}: loss={loss:.4f}, acc={acc:.4f}")

print(f"\nFinale accuracy: {np.mean(model.predict(X) == y):.4f}")

In [None]:
# Visualiseer decision boundary
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Decision boundary
xx, yy = np.meshgrid(np.linspace(-3, 3, 100), np.linspace(-3, 3, 100))
grid = np.c_[xx.ravel(), yy.ravel()]
probs = model.forward(grid).reshape(xx.shape)

axes[0].contourf(xx, yy, probs, levels=20, cmap='RdBu', alpha=0.7)
axes[0].scatter(X[y.flatten()==0, 0], X[y.flatten()==0, 1], c='blue', edgecolors='k')
axes[0].scatter(X[y.flatten()==1, 0], X[y.flatten()==1, 1], c='red', edgecolors='k')
axes[0].set_title('Decision Boundary')
axes[0].set_xlabel('x1')
axes[0].set_ylabel('x2')

# Loss curve
axes[1].plot(losses)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].set_title('Training Loss')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

---

## Oefening 7: MNIST Trainen - Oplossingen

In [None]:
# Opdracht 7a
from sklearn.datasets import fetch_openml

print("MNIST laden...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
X_data, y_data = mnist.data / 255.0, mnist.target.astype(int)

X_train, X_test = X_data[:60000], X_data[60000:]
y_train, y_test = y_data[:60000], y_data[60000:]

print(f"Training: {X_train.shape}, Test: {X_test.shape}")

In [None]:
# Softmax en Cross-Entropy
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(probs, y_true):
    N = probs.shape[0]
    return -np.sum(np.log(probs[np.arange(N), y_true] + 1e-10)) / N

def cross_entropy_gradient(probs, y_true):
    N = probs.shape[0]
    grad = probs.copy()
    grad[np.arange(N), y_true] -= 1
    return grad / N

# MNIST Classifier
class MNISTNet:
    def __init__(self, hidden_dim=128):
        self.layer1 = LinearLayer(784, hidden_dim)
        self.relu = ReLULayer()
        self.layer2 = LinearLayer(hidden_dim, 10)
    
    def forward(self, X):
        self.z1 = self.layer1.forward(X)
        self.a1 = self.relu.forward(self.z1)
        self.z2 = self.layer2.forward(self.a1)
        self.probs = softmax(self.z2)
        return self.probs
    
    def backward(self, y_true):
        dout = cross_entropy_gradient(self.probs, y_true)
        da1 = self.layer2.backward(dout)
        dz1 = self.relu.backward(da1)
        self.layer1.backward(dz1)
    
    def update(self, lr):
        self.layer1.W -= lr * self.layer1.dW
        self.layer1.b -= lr * self.layer1.db
        self.layer2.W -= lr * self.layer2.dW
        self.layer2.b -= lr * self.layer2.db
    
    def predict(self, X):
        return np.argmax(self.forward(X), axis=1)
    
    def accuracy(self, X, y):
        return np.mean(self.predict(X) == y)

In [None]:
# Train
np.random.seed(42)
model = MNISTNet(hidden_dim=128)

lr = 0.5
batch_size = 128
n_epochs = 10

train_losses = []
test_accs = []

n_batches = len(X_train) // batch_size

for epoch in range(n_epochs):
    # Shuffle
    idx = np.random.permutation(len(X_train))
    X_shuf, y_shuf = X_train[idx], y_train[idx]
    
    epoch_loss = 0
    for batch in range(n_batches):
        start = batch * batch_size
        X_b = X_shuf[start:start+batch_size]
        y_b = y_shuf[start:start+batch_size]
        
        probs = model.forward(X_b)
        loss = cross_entropy_loss(probs, y_b)
        epoch_loss += loss
        
        model.backward(y_b)
        model.update(lr)
    
    train_losses.append(epoch_loss / n_batches)
    test_acc = model.accuracy(X_test, y_test)
    test_accs.append(test_acc)
    
    print(f"Epoch {epoch+1:2d}: Loss={train_losses[-1]:.4f}, Test Acc={test_acc:.4f}")

print(f"\nFinale test accuracy: {test_accs[-1]*100:.2f}%")

In [None]:
# Opdracht 7b - Hyperparameter experimenten
results = []

for hidden_dim in [64, 128, 256]:
    for lr in [0.1, 0.5, 1.0]:
        np.random.seed(42)
        model = MNISTNet(hidden_dim=hidden_dim)
        
        # Train for 5 epochs
        for epoch in range(5):
            idx = np.random.permutation(len(X_train))
            for batch in range(n_batches):
                start = batch * batch_size
                X_b = X_train[idx[start:start+batch_size]]
                y_b = y_train[idx[start:start+batch_size]]
                model.forward(X_b)
                model.backward(y_b)
                model.update(lr)
        
        acc = model.accuracy(X_test, y_test)
        results.append((hidden_dim, lr, acc))
        print(f"hidden={hidden_dim}, lr={lr}: acc={acc:.4f}")

print("\nBeste configuratie:")
best = max(results, key=lambda x: x[2])
print(f"hidden={best[0]}, lr={best[1]}: acc={best[2]:.4f}")

---

## Bonusoefening: Tweede Hidden Layer - Oplossing

In [None]:
# Netwerk met 2 hidden layers
class DeepMNISTNet:
    def __init__(self):
        self.layer1 = LinearLayer(784, 256)
        self.relu1 = ReLULayer()
        self.layer2 = LinearLayer(256, 128)
        self.relu2 = ReLULayer()
        self.layer3 = LinearLayer(128, 10)
    
    def forward(self, X):
        self.z1 = self.layer1.forward(X)
        self.a1 = self.relu1.forward(self.z1)
        self.z2 = self.layer2.forward(self.a1)
        self.a2 = self.relu2.forward(self.z2)
        self.z3 = self.layer3.forward(self.a2)
        self.probs = softmax(self.z3)
        return self.probs
    
    def backward(self, y_true):
        dout = cross_entropy_gradient(self.probs, y_true)
        da2 = self.layer3.backward(dout)
        dz2 = self.relu2.backward(da2)
        da1 = self.layer2.backward(dz2)
        dz1 = self.relu1.backward(da1)
        self.layer1.backward(dz1)
    
    def update(self, lr):
        for layer in [self.layer1, self.layer2, self.layer3]:
            layer.W -= lr * layer.dW
            layer.b -= lr * layer.db
    
    def predict(self, X):
        return np.argmax(self.forward(X), axis=1)
    
    def accuracy(self, X, y):
        return np.mean(self.predict(X) == y)

# Train
np.random.seed(42)
deep_model = DeepMNISTNet()

lr = 0.5
for epoch in range(10):
    idx = np.random.permutation(len(X_train))
    for batch in range(n_batches):
        start = batch * batch_size
        X_b = X_train[idx[start:start+batch_size]]
        y_b = y_train[idx[start:start+batch_size]]
        deep_model.forward(X_b)
        deep_model.backward(y_b)
        deep_model.update(lr)
    
    if (epoch + 1) % 2 == 0:
        acc = deep_model.accuracy(X_test, y_test)
        print(f"Epoch {epoch+1}: Test Acc = {acc:.4f}")

print(f"\nDiep netwerk (784→256→128→10): {deep_model.accuracy(X_test, y_test)*100:.2f}%")

---

**Mathematical Foundations** | Les 7 Oplossingen | IT & Artificial Intelligence

---