# Les 6: Labo - Oplossingen

**Mathematical Foundations - IT & Artificial Intelligence**

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

np.set_printoptions(precision=4, suppress=True)
np.random.seed(42)
print("Libraries geladen!")

---

## Oefening 1: Loss Functies - Oplossingen

In [None]:
# Opdracht 1a: MSE Loss
def mse_loss(y_pred, y_true):
    """Bereken Mean Squared Error."""
    return np.mean((y_pred - y_true) ** 2)

# Test
y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
y_pred = np.array([1.1, 2.2, 2.9, 4.0, 5.2])

print(f"MSE: {mse_loss(y_pred, y_true):.4f}")
print(f"Verwacht: {((0.1)**2 + (0.2)**2 + (0.1)**2 + 0 + (0.2)**2) / 5:.4f}")

In [None]:
# Opdracht 1b: MSE Gradiënt
def mse_gradient(y_pred, y_true):
    """Bereken de gradiënt van MSE naar y_pred."""
    n = len(y_true)
    return (2 / n) * (y_pred - y_true)

grad = mse_gradient(y_pred, y_true)
print(f"Gradiënt: {grad}")

In [None]:
# Opdracht 1c: Binary Cross-Entropy
def binary_cross_entropy(y_pred, y_true, epsilon=1e-15):
    """Bereken Binary Cross-Entropy Loss."""
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# Test
y_true_class = np.array([1, 0, 1, 1, 0])
y_pred_probs = np.array([0.9, 0.1, 0.8, 0.7, 0.3])

print(f"BCE: {binary_cross_entropy(y_pred_probs, y_true_class):.4f}")

---

## Oefening 2: 1D Gradient Descent - Oplossingen

In [None]:
# Opdracht 2a: 1D Gradient Descent
def gradient_descent_1d(f, df_dx, x_init, learning_rate, n_iterations):
    """Voer gradient descent uit voor een 1D functie."""
    x = x_init
    history = [x]
    
    for i in range(n_iterations):
        gradient = df_dx(x)
        x = x - learning_rate * gradient
        history.append(x)
    
    return x, np.array(history)

# Functie en afgeleide
def f(x):
    return x**2 - 4*x + 5

def df_dx(x):
    return 2*x - 4

# Test
x_final, history = gradient_descent_1d(f, df_dx, x_init=6.0, learning_rate=0.1, n_iterations=50)
print(f"Gevonden minimum: x = {x_final:.6f}")
print(f"Verwacht minimum: x = 2")

In [None]:
# Opdracht 2b: Visualisatie
x_range = np.linspace(-1, 7, 100)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Functie met pad
axes[0].plot(x_range, f(x_range), 'b-', linewidth=2, label='f(x) = x² - 4x + 5')
axes[0].plot(history, f(history), 'ro-', markersize=5, alpha=0.7, label='GD pad')
axes[0].plot(history[0], f(history[0]), 'g^', markersize=12, label='Start')
axes[0].plot(2, f(2), 'r*', markersize=15, label='Minimum')
axes[0].set_xlabel('x')
axes[0].set_ylabel('f(x)')
axes[0].set_title('Gradient Descent Pad')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Convergentie
axes[1].plot(f(history), 'b-', linewidth=2)
axes[1].set_xlabel('Iteratie')
axes[1].set_ylabel('f(x)')
axes[1].set_title('Convergentie')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Opdracht 2c: Learning rate experimenten
learning_rates = [0.01, 0.1, 0.5, 1.0, 1.5]

plt.figure(figsize=(12, 5))

for lr in learning_rates:
    _, history = gradient_descent_1d(f, df_dx, x_init=6.0, learning_rate=lr, n_iterations=30)
    plt.plot(f(history), label=f'lr = {lr}')

plt.xlabel('Iteratie')
plt.ylabel('f(x)')
plt.title('Effect van Learning Rate')
plt.legend()
plt.grid(True, alpha=0.3)
plt.ylim(0, 20)
plt.show()

print("Observaties:")
print("- lr=0.01: Zeer trage convergentie")
print("- lr=0.1: Goede, stabiele convergentie")
print("- lr=0.5: Snelle convergentie")
print("- lr=1.0: Exacte convergentie in 1 stap (speciaal geval voor kwadratische functie)")
print("- lr=1.5: Oscilleert, convergeert langzaam")

---

## Oefening 3: 2D Gradient Descent - Oplossingen

In [None]:
# Opdracht 3a: 2D Gradient Descent
def gradient_descent_2d(f, grad_f, xy_init, learning_rate, n_iterations):
    """Voer gradient descent uit voor een 2D functie."""
    xy = np.array(xy_init, dtype=float)
    history = [xy.copy()]
    
    for i in range(n_iterations):
        gradient = grad_f(xy[0], xy[1])
        xy = xy - learning_rate * gradient
        history.append(xy.copy())
    
    return xy, np.array(history)

# Functie en gradiënt
def f_2d(x, y):
    return (x - 2)**2 + (y - 3)**2

def grad_f_2d(x, y):
    return np.array([2*(x - 2), 2*(y - 3)])

# Test
xy_final, history_2d = gradient_descent_2d(f_2d, grad_f_2d, xy_init=[-1, 6], 
                                            learning_rate=0.1, n_iterations=50)
print(f"Gevonden minimum: ({xy_final[0]:.4f}, {xy_final[1]:.4f})")
print(f"Verwacht minimum: (2, 3)")

In [None]:
# Opdracht 3b: Visualisatie
x_range = np.linspace(-3, 6, 100)
y_range = np.linspace(-1, 8, 100)
X, Y = np.meshgrid(x_range, y_range)
Z = f_2d(X, Y)

plt.figure(figsize=(10, 8))
plt.contour(X, Y, Z, levels=30, cmap='viridis')
plt.colorbar(label='f(x, y)')

# Plot pad
plt.plot(history_2d[:, 0], history_2d[:, 1], 'ro-', markersize=4, linewidth=1.5, label='GD pad')
plt.plot(history_2d[0, 0], history_2d[0, 1], 'g^', markersize=12, label='Start')
plt.plot(2, 3, 'r*', markersize=15, label='Minimum')

plt.xlabel('x', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('2D Gradient Descent', fontsize=14)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

---

## Oefening 4: Lineaire Regressie - Oplossingen

In [None]:
# Opdracht 4a: Data genereren
np.random.seed(42)
n_samples = 50

X = np.random.uniform(0, 10, n_samples)
y = 2.5 * X + 3 + np.random.randn(n_samples) * 1.5

plt.figure(figsize=(8, 5))
plt.scatter(X, y, alpha=0.7)
plt.xlabel('X')
plt.ylabel('y')
plt.title('Synthetische data: y = 2.5x + 3 + ruis')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Opdracht 4b: Lineaire Regressie met GD
class LinearRegressionGD:
    def __init__(self, learning_rate=0.01):
        self.lr = learning_rate
        self.w = 0.0
        self.b = 0.0
        self.loss_history = []
        self.w_history = []
        self.b_history = []
    
    def predict(self, X):
        return self.w * X + self.b
    
    def compute_loss(self, X, y):
        y_pred = self.predict(X)
        return np.mean((y_pred - y) ** 2)
    
    def compute_gradients(self, X, y):
        n = len(y)
        y_pred = self.predict(X)
        errors = y_pred - y
        
        dw = (2 / n) * np.sum(errors * X)
        db = (2 / n) * np.sum(errors)
        
        return dw, db
    
    def fit(self, X, y, n_epochs=100):
        for epoch in range(n_epochs):
            # Bereken gradiënten
            dw, db = self.compute_gradients(X, y)
            
            # Update parameters
            self.w -= self.lr * dw
            self.b -= self.lr * db
            
            # Log
            loss = self.compute_loss(X, y)
            self.loss_history.append(loss)
            self.w_history.append(self.w)
            self.b_history.append(self.b)
        
        return self

# Train
model = LinearRegressionGD(learning_rate=0.01)
model.fit(X, y, n_epochs=200)

print(f"Geleerd: w = {model.w:.4f}, b = {model.b:.4f}")
print(f"Werkelijk: w = 2.5, b = 3")

In [None]:
# Opdracht 4c: Visualisatie
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Data met fit
axes[0].scatter(X, y, alpha=0.7, label='Data')
x_line = np.linspace(0, 10, 100)
axes[0].plot(x_line, model.predict(x_line), 'r-', linewidth=2, 
             label=f'Fit: y = {model.w:.2f}x + {model.b:.2f}')
axes[0].plot(x_line, 2.5*x_line + 3, 'g--', alpha=0.5, label='Werkelijk')
axes[0].set_xlabel('X')
axes[0].set_ylabel('y')
axes[0].set_title('Lineaire Regressie')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Loss curve
axes[1].plot(model.loss_history, 'b-', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MSE Loss')
axes[1].set_title('Training Loss')
axes[1].grid(True, alpha=0.3)

# Parameter evolutie
axes[2].plot(model.w_history, label='w', linewidth=2)
axes[2].plot(model.b_history, label='b', linewidth=2)
axes[2].axhline(y=2.5, color='blue', linestyle='--', alpha=0.5)
axes[2].axhline(y=3, color='orange', linestyle='--', alpha=0.5)
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Waarde')
axes[2].set_title('Parameter Evolutie')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

---

## Oefening 5: Batch vs Mini-batch vs Stochastic GD - Oplossingen

In [None]:
# Opdracht 5a: Drie GD varianten

def batch_gradient_descent(X, y, learning_rate, n_epochs):
    """Batch gradient descent."""
    w, b = 0.0, 0.0
    loss_history = []
    n = len(y)
    
    for epoch in range(n_epochs):
        # Voorspelling
        y_pred = w * X + b
        
        # Gradiënten over ALLE data
        dw = (2/n) * np.sum((y_pred - y) * X)
        db = (2/n) * np.sum(y_pred - y)
        
        # Update
        w -= learning_rate * dw
        b -= learning_rate * db
        
        loss_history.append(np.mean((y_pred - y)**2))
    
    return w, b, loss_history


def minibatch_gradient_descent(X, y, learning_rate, n_epochs, batch_size=16):
    """Mini-batch gradient descent."""
    w, b = 0.0, 0.0
    loss_history = []
    n = len(y)
    
    for epoch in range(n_epochs):
        # Shuffle data
        indices = np.random.permutation(n)
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        
        # Itereer over mini-batches
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X_shuffled[start:end]
            y_batch = y_shuffled[start:end]
            
            y_pred = w * X_batch + b
            
            dw = (2/len(y_batch)) * np.sum((y_pred - y_batch) * X_batch)
            db = (2/len(y_batch)) * np.sum(y_pred - y_batch)
            
            w -= learning_rate * dw
            b -= learning_rate * db
        
        # Log loss na elke epoch
        y_pred_all = w * X + b
        loss_history.append(np.mean((y_pred_all - y)**2))
    
    return w, b, loss_history


def stochastic_gradient_descent(X, y, learning_rate, n_epochs):
    """Stochastic gradient descent (batch_size=1)."""
    w, b = 0.0, 0.0
    loss_history = []
    n = len(y)
    
    for epoch in range(n_epochs):
        # Shuffle data
        indices = np.random.permutation(n)
        
        for i in indices:
            xi, yi = X[i], y[i]
            
            y_pred = w * xi + b
            
            dw = 2 * (y_pred - yi) * xi
            db = 2 * (y_pred - yi)
            
            w -= learning_rate * dw
            b -= learning_rate * db
        
        # Log loss na elke epoch
        y_pred_all = w * X + b
        loss_history.append(np.mean((y_pred_all - y)**2))
    
    return w, b, loss_history

In [None]:
# Opdracht 5b: Vergelijking
n_epochs = 100

w_batch, b_batch, loss_batch = batch_gradient_descent(X, y, 0.01, n_epochs)
w_mini, b_mini, loss_mini = minibatch_gradient_descent(X, y, 0.01, n_epochs, batch_size=8)
w_sgd, b_sgd, loss_sgd = stochastic_gradient_descent(X, y, 0.001, n_epochs)  # Kleinere lr voor SGD

print("Resultaten:")
print(f"Batch GD:      w = {w_batch:.4f}, b = {b_batch:.4f}")
print(f"Mini-batch GD: w = {w_mini:.4f}, b = {b_mini:.4f}")
print(f"SGD:           w = {w_sgd:.4f}, b = {b_sgd:.4f}")
print(f"Werkelijk:     w = 2.5, b = 3")

# Plot
plt.figure(figsize=(10, 6))
plt.plot(loss_batch, 'b-', linewidth=2, label='Batch GD')
plt.plot(loss_mini, 'g-', linewidth=2, alpha=0.7, label='Mini-batch GD')
plt.plot(loss_sgd, 'r-', linewidth=1, alpha=0.5, label='SGD')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.title('Vergelijking GD Varianten')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

### Opdracht 5c - Antwoord

**Stabiliteit:** Batch GD is het meest stabiel (gladde curve). SGD is het meest ruisig. Mini-batch zit ertussenin.

**Snelheid:** SGD en mini-batch convergeren vaak sneller per epoch, maar zijn ruisiger. Batch GD is trager maar consistenter.

**Eindresultaat:** Alle drie komen bij ongeveer dezelfde oplossing, mits goed getuned.

---

## Oefening 6: Learning Rate Scheduling - Oplossingen

In [None]:
# Opdracht 6a: GD met decay
def gradient_descent_with_decay(X, y, lr_init, decay, n_epochs):
    """Gradient descent met afnemende learning rate."""
    w, b = 0.0, 0.0
    loss_history = []
    lr_history = []
    n = len(y)
    
    for epoch in range(n_epochs):
        # Bereken huidige learning rate
        lr = lr_init / (1 + decay * epoch)
        lr_history.append(lr)
        
        # Voorspelling en gradiënt
        y_pred = w * X + b
        dw = (2/n) * np.sum((y_pred - y) * X)
        db = (2/n) * np.sum(y_pred - y)
        
        # Update met afnemende lr
        w -= lr * dw
        b -= lr * db
        
        loss_history.append(np.mean((y_pred - y)**2))
    
    return w, b, loss_history, lr_history

# Test
w, b, loss, lr = gradient_descent_with_decay(X, y, lr_init=0.05, decay=0.01, n_epochs=200)
print(f"w = {w:.4f}, b = {b:.4f}")

In [None]:
# Opdracht 6b: Vergelijking
w_const, b_const, loss_const = batch_gradient_descent(X, y, 0.01, 200)
w_decay, b_decay, loss_decay, lr_decay = gradient_descent_with_decay(X, y, 0.05, 0.02, 200)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(loss_const, 'b-', label='Constante lr=0.01')
axes[0].plot(loss_decay, 'r-', label='Decay lr (start=0.05)')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss Vergelijking')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(lr_decay, 'r-')
axes[1].axhline(y=0.01, color='b', linestyle='--', label='Constante lr')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Learning Rate')
axes[1].set_title('Learning Rate Schedule')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("Met decay: start snel, verfijn later. Vaak betere convergentie.")

---

## Oefening 7: Logistische Regressie - Oplossingen

In [None]:
# Opdracht 7a: Data genereren
np.random.seed(42)
n_samples = 100

X0 = np.random.randn(n_samples // 2, 2) * 0.8 + np.array([-1, -1])
y0 = np.zeros(n_samples // 2)

X1 = np.random.randn(n_samples // 2, 2) * 0.8 + np.array([1, 1])
y1 = np.ones(n_samples // 2)

X_class = np.vstack([X0, X1])
y_class = np.hstack([y0, y1])

plt.figure(figsize=(8, 6))
plt.scatter(X_class[y_class == 0, 0], X_class[y_class == 0, 1], c='blue', label='Klasse 0')
plt.scatter(X_class[y_class == 1, 0], X_class[y_class == 1, 1], c='red', label='Klasse 1')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.title('Binaire classificatie data')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Opdracht 7b: Logistische Regressie
def sigmoid(z):
    return 1 / (1 + np.exp(-np.clip(z, -500, 500)))

class LogisticRegressionGD:
    def __init__(self, learning_rate=0.1):
        self.lr = learning_rate
        self.w = None
        self.b = 0.0
        self.loss_history = []
    
    def predict_proba(self, X):
        z = X @ self.w + self.b
        return sigmoid(z)
    
    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)
    
    def fit(self, X, y, n_epochs=100):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        
        for epoch in range(n_epochs):
            # Forward
            y_pred = self.predict_proba(X)
            
            # Gradiënten (voor cross-entropy loss)
            errors = y_pred - y
            dw = (1/n_samples) * (X.T @ errors)
            db = (1/n_samples) * np.sum(errors)
            
            # Update
            self.w -= self.lr * dw
            self.b -= self.lr * db
            
            # Log loss
            eps = 1e-15
            y_pred_clipped = np.clip(y_pred, eps, 1 - eps)
            loss = -np.mean(y * np.log(y_pred_clipped) + (1-y) * np.log(1-y_pred_clipped))
            self.loss_history.append(loss)
        
        return self

# Train
log_model = LogisticRegressionGD(learning_rate=0.5)
log_model.fit(X_class, y_class, n_epochs=200)

# Evalueer
predictions = log_model.predict(X_class)
accuracy = np.mean(predictions == y_class)
print(f"Nauwkeurigheid: {accuracy * 100:.1f}%")
print(f"Weights: {log_model.w}")
print(f"Bias: {log_model.b:.4f}")

In [None]:
# Opdracht 7c: Decision boundary
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Decision boundary
x_min, x_max = X_class[:, 0].min() - 1, X_class[:, 0].max() + 1
y_min, y_max = X_class[:, 1].min() - 1, X_class[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                     np.linspace(y_min, y_max, 100))

Z = log_model.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

axes[0].contourf(xx, yy, Z, levels=50, cmap='RdBu', alpha=0.6)
axes[0].scatter(X_class[y_class == 0, 0], X_class[y_class == 0, 1], c='blue', edgecolor='k')
axes[0].scatter(X_class[y_class == 1, 0], X_class[y_class == 1, 1], c='red', edgecolor='k')
axes[0].contour(xx, yy, Z, levels=[0.5], colors='black', linewidths=2)
axes[0].set_xlabel('Feature 1')
axes[0].set_ylabel('Feature 2')
axes[0].set_title('Decision Boundary')

# Loss curve
axes[1].plot(log_model.loss_history, 'b-', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Cross-Entropy Loss')
axes[1].set_title('Training Loss')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

---

## Oefening 8: Momentum - Oplossingen

In [None]:
# Opdracht 8a: GD met momentum
def gradient_descent_momentum_2d(f, grad_f, xy_init, learning_rate, momentum, n_iterations):
    """Gradient descent met momentum."""
    xy = np.array(xy_init, dtype=float)
    v = np.zeros(2)  # Velocity
    history = [xy.copy()]
    
    for i in range(n_iterations):
        gradient = grad_f(xy[0], xy[1])
        v = momentum * v - learning_rate * gradient
        xy = xy + v
        history.append(xy.copy())
    
    return xy, np.array(history)

In [None]:
# Opdracht 8b: Vergelijking op langgerekte vallei
def f_elongated(x, y):
    return 0.1 * x**2 + 2 * y**2

def grad_f_elongated(x, y):
    return np.array([0.2 * x, 4 * y])

# Start punt
xy_init = [10, 1]
n_iter = 50
lr = 0.1

# Zonder momentum
xy_no_mom, hist_no_mom = gradient_descent_2d(f_elongated, grad_f_elongated, xy_init, lr, n_iter)

# Met momentum
xy_mom, hist_mom = gradient_descent_momentum_2d(f_elongated, grad_f_elongated, xy_init, lr, 0.9, n_iter)

print(f"Zonder momentum: ({xy_no_mom[0]:.4f}, {xy_no_mom[1]:.4f})")
print(f"Met momentum:    ({xy_mom[0]:.4f}, {xy_mom[1]:.4f})")

# Visualisatie
x_range = np.linspace(-2, 12, 100)
y_range = np.linspace(-2, 2, 100)
X, Y = np.meshgrid(x_range, y_range)
Z = f_elongated(X, Y)

plt.figure(figsize=(12, 5))

plt.subplot(121)
plt.contour(X, Y, Z, levels=30, cmap='viridis')
plt.plot(hist_no_mom[:, 0], hist_no_mom[:, 1], 'ro-', markersize=3, label='Zonder momentum')
plt.plot(0, 0, 'g*', markersize=15)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Zonder Momentum')
plt.legend()

plt.subplot(122)
plt.contour(X, Y, Z, levels=30, cmap='viridis')
plt.plot(hist_mom[:, 0], hist_mom[:, 1], 'bo-', markersize=3, label='Met momentum (0.9)')
plt.plot(0, 0, 'g*', markersize=15)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Met Momentum')
plt.legend()

plt.tight_layout()
plt.show()

print("\nMomentum helpt om sneller door de langgerekte vallei te navigeren!")

---

**Mathematical Foundations** | Les 6 Oplossingen | IT & Artificial Intelligence

---