# Les 9: Labo - Oplossingen

**Mathematical Foundations - IT & Artificial Intelligence**

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.set_printoptions(precision=4, suppress=True)
np.random.seed(42)

print("Libraries geladen!")

---

## Oefening 1: Verwachtingswaarde - Oplossingen

In [None]:
# Opdracht 1a

# 1. Discrete verdeling
x1 = np.array([1, 2, 3])
p1 = np.array([0.2, 0.5, 0.3])
E_X1 = np.sum(x1 * p1)
print(f"1. E[X] = {E_X1}")

# 2. Binomiaal(n=10, p=0.3)
# E[X] = n*p
n, p = 10, 0.3
E_X2 = n * p
print(f"2. E[Y] (Binomiaal) = n*p = {E_X2}")

# 3. Poisson(λ=5)
# E[X] = λ
lam = 5
E_X3 = lam
print(f"3. E[Z] (Poisson) = λ = {E_X3}")

In [None]:
# Opdracht 1b - E[X²] vs (E[X])²
E_X = np.sum(x1 * p1)
E_X2_func = np.sum(x1**2 * p1)

print(f"E[X] = {E_X}")
print(f"E[X²] = {E_X2_func}")
print(f"(E[X])² = {E_X**2}")
print(f"\nE[X²] ≠ (E[X])² in het algemeen!")
print(f"Het verschil is Var(X) = E[X²] - (E[X])² = {E_X2_func - E_X**2}")

---

## Oefening 2: Variantie - Oplossingen

In [None]:
# Opdracht 2a

# 1. Discrete verdeling
E_X = np.sum(x1 * p1)
E_X2 = np.sum(x1**2 * p1)
Var_X1 = E_X2 - E_X**2
print(f"1. Var(X) = {Var_X1}")

# 2. Binomiaal(n=10, p=0.3)
# Var(X) = n*p*(1-p)
Var_X2 = n * p * (1 - p)
print(f"2. Var(Y) (Binomiaal) = n*p*(1-p) = {Var_X2}")

# 3. Poisson(λ=5)
# Var(X) = λ
Var_X3 = lam
print(f"3. Var(Z) (Poisson) = λ = {Var_X3}")

In [None]:
# Opdracht 2b - Var(aX + b) = a²·Var(X)
X = np.random.normal(5, 2, 100000)  # Var = 4
a, b = 3, 10
Y = a * X + b

print(f"Var(X) = {np.var(X):.4f}")
print(f"Var(aX + b) = Var({a}X + {b}) = {np.var(Y):.4f}")
print(f"a² · Var(X) = {a**2} · {np.var(X):.4f} = {a**2 * np.var(X):.4f}")
print(f"\nDe constante b heeft geen effect op de variantie!")

---

## Oefening 3: Steekproefgemiddelde - Oplossingen

In [None]:
# Opdracht 3a
def sample_mean(x):
    """Bereken het steekproefgemiddelde."""
    return np.sum(x) / len(x)

def sample_variance(x, ddof=1):
    """Bereken de steekproefvariantie met Bessel's correctie."""
    n = len(x)
    mean = sample_mean(x)
    return np.sum((x - mean)**2) / (n - ddof)

# Test
data = np.array([2, 4, 4, 4, 5, 5, 7, 9])
print(f"Data: {data}")
print(f"Sample mean: {sample_mean(data)} (np: {np.mean(data)})")
print(f"Sample variance (ddof=1): {sample_variance(data, 1):.4f} (np: {np.var(data, ddof=1):.4f})")
print(f"Sample variance (ddof=0): {sample_variance(data, 0):.4f} (np: {np.var(data, ddof=0):.4f})")

In [None]:
# Opdracht 3b - Bias van steekproefvariantie
n_samples = 5
n_experiments = 10000
true_variance = 1.0  # N(0, 1)

var_biased = []    # ddof=0
var_unbiased = []  # ddof=1

for _ in range(n_experiments):
    sample = np.random.randn(n_samples)
    var_biased.append(np.var(sample, ddof=0))
    var_unbiased.append(np.var(sample, ddof=1))

print(f"True variance: {true_variance}")
print(f"Mean of biased (N): {np.mean(var_biased):.4f}")
print(f"Mean of unbiased (N-1): {np.mean(var_unbiased):.4f}")
print(f"\nBessel's correctie (N-1) geeft een onvertekende schatter!")

---

## Oefening 4: Covariantie en Correlatie - Oplossingen

In [None]:
# Opdracht 4a
def covariance(x, y):
    """Bereken de steekproef-covariantie."""
    n = len(x)
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    return np.sum((x - x_mean) * (y - y_mean)) / (n - 1)

def correlation(x, y):
    """Bereken de Pearson correlatie."""
    cov = covariance(x, y)
    std_x = np.std(x, ddof=1)
    std_y = np.std(y, ddof=1)
    return cov / (std_x * std_y)

# Test
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 5])

print(f"Covariance: {covariance(x, y):.4f} (np: {np.cov(x, y)[0,1]:.4f})")
print(f"Correlation: {correlation(x, y):.4f} (np: {np.corrcoef(x, y)[0,1]:.4f})")

In [None]:
# Opdracht 4b - Visualiseer correlaties
def generate_correlated(n, rho):
    """Genereer data met gegeven correlatie."""
    x = np.random.randn(n)
    z = np.random.randn(n)
    y = rho * x + np.sqrt(1 - rho**2) * z
    return x, y

correlations = [0.9, 0.5, 0, -0.7]
fig, axes = plt.subplots(2, 2, figsize=(10, 10))

for ax, rho in zip(axes.flatten(), correlations):
    x, y = generate_correlated(200, rho)
    actual_corr = np.corrcoef(x, y)[0, 1]
    
    ax.scatter(x, y, alpha=0.5)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_title(f'Target ρ = {rho}, Actual = {actual_corr:.3f}')
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

---

## Oefening 5: Batch Normalization - Oplossingen

In [None]:
# Opdracht 5a
class BatchNorm:
    def __init__(self, n_features, epsilon=1e-5, momentum=0.9):
        self.epsilon = epsilon
        self.momentum = momentum
        
        self.gamma = np.ones(n_features)
        self.beta = np.zeros(n_features)
        
        self.running_mean = np.zeros(n_features)
        self.running_var = np.ones(n_features)
    
    def forward(self, x, training=True):
        if training:
            self.mu = np.mean(x, axis=0)
            self.var = np.var(x, axis=0)
            
            # Update running statistics
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * self.mu
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * self.var
        else:
            self.mu = self.running_mean
            self.var = self.running_var
        
        self.x_centered = x - self.mu
        self.std = np.sqrt(self.var + self.epsilon)
        self.x_norm = self.x_centered / self.std
        
        out = self.gamma * self.x_norm + self.beta
        self.x = x
        return out
    
    def backward(self, dout):
        N = dout.shape[0]
        
        # Gradients for gamma and beta
        self.dgamma = np.sum(dout * self.x_norm, axis=0)
        self.dbeta = np.sum(dout, axis=0)
        
        # Gradient for x
        dx_norm = dout * self.gamma
        dvar = np.sum(dx_norm * self.x_centered * -0.5 * (self.var + self.epsilon)**(-1.5), axis=0)
        dmu = np.sum(dx_norm * -1 / self.std, axis=0) + dvar * np.mean(-2 * self.x_centered, axis=0)
        dx = dx_norm / self.std + dvar * 2 * self.x_centered / N + dmu / N
        
        return dx

In [None]:
# Opdracht 5b - Test BatchNorm
np.random.seed(42)
x = np.random.randn(64, 4) * 100 + 500  # Extreem gemiddelde en variantie

bn = BatchNorm(4)
out = bn.forward(x)

print("Vóór BatchNorm:")
print(f"  Mean: {np.mean(x, axis=0)}")
print(f"  Var: {np.var(x, axis=0)}")

print("\nNa BatchNorm:")
print(f"  Mean: {np.mean(out, axis=0)}")
print(f"  Var: {np.var(out, axis=0)}")

---

## Oefening 6: Weight Initialisatie - Oplossingen

In [None]:
# Opdracht 6a
def xavier_init(n_in, n_out):
    """Xavier initialisatie."""
    std = np.sqrt(2.0 / (n_in + n_out))
    return np.random.randn(n_in, n_out) * std

def he_init(n_in, n_out):
    """He initialisatie."""
    std = np.sqrt(2.0 / n_in)
    return np.random.randn(n_in, n_out) * std

# Test
W_xavier = xavier_init(256, 256)
W_he = he_init(256, 256)

print(f"Xavier: mean={np.mean(W_xavier):.4f}, var={np.var(W_xavier):.6f}")
print(f"He: mean={np.mean(W_he):.4f}, var={np.var(W_he):.6f}")
print(f"\nTheoretisch: Xavier var = 2/(256+256) = {2/512:.6f}")
print(f"Theoretisch: He var = 2/256 = {2/256:.6f}")

In [None]:
# Opdracht 6b
def relu(x):
    return np.maximum(0, x)

n_layers = 10
n_neurons = 256
batch_size = 100

x = np.random.randn(batch_size, n_neurons)

# Drie initialisaties
inits = {
    'N(0,1)': lambda n_in, n_out: np.random.randn(n_in, n_out),
    'N(0,0.01)': lambda n_in, n_out: np.random.randn(n_in, n_out) * 0.01,
    'He': he_init
}

results = {}

for name, init_fn in inits.items():
    np.random.seed(42)
    weights = [init_fn(n_neurons, n_neurons) for _ in range(n_layers)]
    
    variances = [np.var(x)]
    h = x
    for W in weights:
        h = relu(h @ W)
        variances.append(np.var(h))
    
    results[name] = variances

# Plot
plt.figure(figsize=(10, 6))
for name, variances in results.items():
    plt.plot(variances, 'o-', label=name)

plt.xlabel('Layer')
plt.ylabel('Variance')
plt.title('Activation Variance per Layer')
plt.yscale('log')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

---

## Oefening 7: Standaardisatie - Oplossingen

In [None]:
# Opdracht 7a
class StandardScaler:
    def fit(self, X):
        self.mean_ = np.mean(X, axis=0)
        self.std_ = np.std(X, axis=0)
        return self
    
    def transform(self, X):
        return (X - self.mean_) / self.std_
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)
    
    def inverse_transform(self, Z):
        return Z * self.std_ + self.mean_

# Test
X = np.random.randn(100, 3) * np.array([10, 100, 1000]) + np.array([5, 50, 500])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("Originele data:")
print(f"  Mean: {np.mean(X, axis=0)}")
print(f"  Std: {np.std(X, axis=0)}")

print("\nGestandaardiseerde data:")
print(f"  Mean: {np.mean(X_scaled, axis=0)}")
print(f"  Std: {np.std(X_scaled, axis=0)}")

# Verify inverse
X_recovered = scaler.inverse_transform(X_scaled)
print(f"\nInverse transform correct: {np.allclose(X, X_recovered)}")

---

## Oefening 8: Wet van Grote Aantallen - Oplossingen

In [None]:
# Opdracht 8a
lam = 2  # E[X] = 1/λ = 0.5
true_mean = 1 / lam

n_samples = 10000
samples = np.random.exponential(1/lam, n_samples)

# Cumulatief gemiddelde
cumulative_mean = np.cumsum(samples) / np.arange(1, n_samples + 1)

plt.figure(figsize=(12, 5))
plt.plot(cumulative_mean, 'b-', alpha=0.7)
plt.axhline(y=true_mean, color='r', linestyle='--', linewidth=2, label=f'E[X] = {true_mean}')
plt.xlabel('Aantal samples', fontsize=12)
plt.ylabel('Cumulatief gemiddelde', fontsize=12)
plt.title('Wet van Grote Aantallen: convergentie naar E[X]', fontsize=14)
plt.xscale('log')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.show()

print(f"True mean: {true_mean}")
print(f"Sample mean (n={n_samples}): {np.mean(samples):.4f}")

---

## Bonusoefening: Variance Reduction - Oplossing

In [None]:
# Antithetic variates voor E[e^U] waar U ~ Uniform(0,1)
# True value: E[e^U] = e - 1 ≈ 1.7183

true_value = np.e - 1
n_experiments = 1000
n_samples = 100

# Standaard Monte Carlo
estimates_standard = []
for _ in range(n_experiments):
    U = np.random.uniform(0, 1, n_samples)
    estimates_standard.append(np.mean(np.exp(U)))

# Antithetic variates
estimates_antithetic = []
for _ in range(n_experiments):
    U = np.random.uniform(0, 1, n_samples // 2)
    # Gebruik zowel U als 1-U
    estimates_antithetic.append(np.mean((np.exp(U) + np.exp(1-U)) / 2))

print(f"True value: {true_value:.4f}")
print(f"\nStandard MC:")
print(f"  Mean estimate: {np.mean(estimates_standard):.4f}")
print(f"  Variance: {np.var(estimates_standard):.6f}")
print(f"\nAntithetic variates:")
print(f"  Mean estimate: {np.mean(estimates_antithetic):.4f}")
print(f"  Variance: {np.var(estimates_antithetic):.6f}")
print(f"\nVariance reduction: {np.var(estimates_standard)/np.var(estimates_antithetic):.2f}x")

---

**Mathematical Foundations** | Les 9 Oplossingen | IT & Artificial Intelligence

---