# Les 8: Labo - Oplossingen

**Mathematical Foundations - IT & Artificial Intelligence**

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.set_printoptions(precision=4, suppress=True)
np.random.seed(42)

print("Libraries geladen!")

---

## Oefening 1: Basiskansrekening - Oplossingen

In [None]:
# Opdracht 1a - Twee dobbelstenen

# Alle mogelijke uitkomsten
outcomes = [(i, j) for i in range(1, 7) for j in range(1, 7)]
n_total = len(outcomes)  # 36

# 1. P(totaal = 7)
sum_7 = [(i, j) for i, j in outcomes if i + j == 7]
p_sum_7 = len(sum_7) / n_total
print(f"1. P(totaal = 7) = {len(sum_7)}/36 = {p_sum_7:.4f}")
print(f"   Combinaties: {sum_7}")

# 2. P(totaal = 12)
sum_12 = [(i, j) for i, j in outcomes if i + j == 12]
p_sum_12 = len(sum_12) / n_total
print(f"\n2. P(totaal = 12) = {len(sum_12)}/36 = {p_sum_12:.4f}")

# 3. P(totaal > 9)
sum_gt_9 = [(i, j) for i, j in outcomes if i + j > 9]
p_sum_gt_9 = len(sum_gt_9) / n_total
print(f"\n3. P(totaal > 9) = {len(sum_gt_9)}/36 = {p_sum_gt_9:.4f}")

# 4. P(minstens één 6)
at_least_one_6 = [(i, j) for i, j in outcomes if i == 6 or j == 6]
p_at_least_6 = len(at_least_one_6) / n_total
print(f"\n4. P(minstens één 6) = {len(at_least_one_6)}/36 = {p_at_least_6:.4f}")

In [None]:
# Simulatie verificatie
n_sims = 100000
die1 = np.random.randint(1, 7, n_sims)
die2 = np.random.randint(1, 7, n_sims)
totals = die1 + die2

print("Simulatie verificatie (100,000 worpen):")
print(f"  P(totaal = 7): theorie={p_sum_7:.4f}, sim={np.mean(totals == 7):.4f}")
print(f"  P(totaal = 12): theorie={p_sum_12:.4f}, sim={np.mean(totals == 12):.4f}")
print(f"  P(totaal > 9): theorie={p_sum_gt_9:.4f}, sim={np.mean(totals > 9):.4f}")
print(f"  P(minstens één 6): theorie={p_at_least_6:.4f}, sim={np.mean((die1==6)|(die2==6)):.4f}")

In [None]:
# Opdracht 1b - Heatmap
prob_matrix = np.ones((6, 6)) / 36

plt.figure(figsize=(8, 6))
plt.imshow(prob_matrix, cmap='Blues')
plt.colorbar(label='Kans')

# Annoteer met totalen
for i in range(6):
    for j in range(6):
        plt.text(j, i, f'{i+j+2}', ha='center', va='center', fontsize=12)

plt.xlabel('Dobbelsteen 2', fontsize=12)
plt.ylabel('Dobbelsteen 1', fontsize=12)
plt.xticks(range(6), range(1, 7))
plt.yticks(range(6), range(1, 7))
plt.title('Totalen van twee dobbelstenen (elke cel heeft kans 1/36)', fontsize=12)
plt.show()

---

## Oefening 2: Discrete Verdelingen - Oplossingen

In [None]:
# Opdracht 2a - Binomiaal verdeling
n = 100  # chips
p = 0.02  # defectrate

# 1. P(X = 2)
p_exact_2 = stats.binom.pmf(2, n, p)
print(f"1. P(exact 2 defect) = {p_exact_2:.4f}")

# 2. P(X < 5) = P(X ≤ 4)
p_less_5 = stats.binom.cdf(4, n, p)
print(f"2. P(minder dan 5 defect) = {p_less_5:.4f}")

# 3. P(X > 5) = 1 - P(X ≤ 5)
p_more_5 = 1 - stats.binom.cdf(5, n, p)
print(f"3. P(meer dan 5 defect) = {p_more_5:.4f}")

# Visualisatie
k = np.arange(0, 15)
pmf = stats.binom.pmf(k, n, p)

plt.figure(figsize=(10, 5))
plt.bar(k, pmf, color='steelblue', edgecolor='black')
plt.xlabel('Aantal defecte chips', fontsize=12)
plt.ylabel('Kans', fontsize=12)
plt.title(f'Binomiaal(n={n}, p={p})', fontsize=14)
plt.xticks(k)
plt.grid(axis='y', alpha=0.3)
plt.show()

In [None]:
# Opdracht 2b - Categorical sample
def categorical_sample(probs, n_samples=1):
    """Trek samples uit een categorische verdeling."""
    probs = np.array(probs)
    assert np.isclose(np.sum(probs), 1.0), "Kansen moeten sommeren tot 1"
    
    # Cumulatieve kansen
    cumsum = np.cumsum(probs)
    
    # Trek uniforme random getallen
    u = np.random.random(n_samples)
    
    # Vind de categorie
    samples = np.searchsorted(cumsum, u)
    return samples

# Test
probs = [0.1, 0.3, 0.4, 0.2]
samples = categorical_sample(probs, 10000)

print("Categorische verdeling test:")
print(f"Theoretische kansen: {probs}")
print(f"Gemeten frequenties: {[np.mean(samples == i) for i in range(4)]}")

---

## Oefening 3: Normale Verdeling - Oplossingen

In [None]:
# Opdracht 3a - IQ scores
mu, sigma = 100, 15

# 1. P(IQ > 130)
p_above_130 = 1 - stats.norm.cdf(130, mu, sigma)
print(f"1. P(IQ > 130) = {p_above_130:.4f} = {p_above_130*100:.2f}%")

# 2. P(85 < IQ < 115)
p_between = stats.norm.cdf(115, mu, sigma) - stats.norm.cdf(85, mu, sigma)
print(f"2. P(85 < IQ < 115) = {p_between:.4f} = {p_between*100:.2f}%")

# 3. IQ waarboven 1% scoort (99e percentiel)
iq_99 = stats.norm.ppf(0.99, mu, sigma)
print(f"3. 99e percentiel IQ = {iq_99:.1f}")

# 4. Verwacht aantal met IQ > 145 uit 1000 mensen
p_above_145 = 1 - stats.norm.cdf(145, mu, sigma)
expected = 1000 * p_above_145
print(f"4. P(IQ > 145) = {p_above_145:.6f}")
print(f"   Verwacht uit 1000: {expected:.2f} mensen")

In [None]:
# Opdracht 3b - Normale PDF implementatie
def normal_pdf(x, mu, sigma):
    """Bereken de PDF van de normale verdeling."""
    coefficient = 1 / np.sqrt(2 * np.pi * sigma**2)
    exponent = -((x - mu)**2) / (2 * sigma**2)
    return coefficient * np.exp(exponent)

# Test
x_test = np.linspace(-3, 3, 100)
my_pdf = normal_pdf(x_test, 0, 1)
scipy_pdf = stats.norm.pdf(x_test, 0, 1)

plt.figure(figsize=(10, 5))
plt.plot(x_test, my_pdf, 'b-', linewidth=2, label='Mijn implementatie')
plt.plot(x_test, scipy_pdf, 'r--', linewidth=2, label='Scipy')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('Vergelijking normale PDF implementaties')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"Maximum verschil: {np.max(np.abs(my_pdf - scipy_pdf)):.2e}")

---

## Oefening 4: Centrale Limietstelling - Oplossingen

In [None]:
# Opdracht 4a - CLT met exponentiële verdeling
fig, axes = plt.subplots(2, 3, figsize=(15, 8))

n_samples = 10000
lam = 1  # λ = 1

for ax, n in zip(axes.flatten(), [1, 2, 5, 10, 30, 100]):
    # Trek n exponentiële samples en neem gemiddelde
    samples = np.random.exponential(1/lam, (n_samples, n))
    means = np.mean(samples, axis=1)
    
    ax.hist(means, bins=50, density=True, alpha=0.7, color='steelblue', edgecolor='black')
    
    # Theoretische normale benadering
    # E[Exp(λ)] = 1/λ, Var[Exp(λ)] = 1/λ²
    mu_theory = 1/lam
    sigma_theory = (1/lam) / np.sqrt(n)
    x = np.linspace(0, 3, 100)
    ax.plot(x, stats.norm.pdf(x, mu_theory, sigma_theory), 'r-', linewidth=2)
    
    ax.set_title(f'n = {n}')
    ax.set_xlim(0, 3)

plt.suptitle('CLT: gemiddelde van n exponentiële variabelen', fontsize=14)
plt.tight_layout()
plt.show()

---

## Oefening 5: Conditionele Kans - Oplossingen

In [None]:
# Opdracht 5a - Spamfilter met Bayes
P_spam = 0.3
P_not_spam = 1 - P_spam
P_gratis_given_spam = 0.8
P_gratis_given_not_spam = 0.1

# P(gratis) = P(gratis|spam)P(spam) + P(gratis|not spam)P(not spam)
P_gratis = P_gratis_given_spam * P_spam + P_gratis_given_not_spam * P_not_spam

# Bayes: P(spam|gratis) = P(gratis|spam)P(spam) / P(gratis)
P_spam_given_gratis = (P_gratis_given_spam * P_spam) / P_gratis

# P(not spam|gratis) = 1 - P(spam|gratis)
P_not_spam_given_gratis = 1 - P_spam_given_gratis

print("Spamfilter analyse:")
print(f"P(gratis) = {P_gratis:.4f}")
print(f"\n1. P(spam | 'gratis') = {P_spam_given_gratis:.4f} = {P_spam_given_gratis*100:.1f}%")
print(f"2. P(niet spam | 'gratis') = {P_not_spam_given_gratis:.4f} = {P_not_spam_given_gratis*100:.1f}%")

In [None]:
# Opdracht 5b - Simulatie verificatie
n_emails = 100000

# Genereer emails
is_spam = np.random.random(n_emails) < P_spam

# Genereer of email "gratis" bevat
has_gratis = np.zeros(n_emails, dtype=bool)
has_gratis[is_spam] = np.random.random(np.sum(is_spam)) < P_gratis_given_spam
has_gratis[~is_spam] = np.random.random(np.sum(~is_spam)) < P_gratis_given_not_spam

# Bereken P(spam | gratis) empirisch
emails_with_gratis = has_gratis
spam_with_gratis = is_spam & has_gratis

P_spam_given_gratis_sim = np.sum(spam_with_gratis) / np.sum(emails_with_gratis)

print("\nSimulatie verificatie:")
print(f"P(spam | 'gratis'): theorie={P_spam_given_gratis:.4f}, sim={P_spam_given_gratis_sim:.4f}")

---

## Oefening 6: Softmax en Kansen - Oplossingen

In [None]:
# Opdracht 6a - Softmax implementatie
def softmax(x):
    """Numeriek stabiele softmax."""
    x = np.array(x)
    exp_x = np.exp(x - np.max(x))  # Numerieke stabiliteit
    return exp_x / np.sum(exp_x)

# Test met verschillende inputs
test_inputs = [
    [1, 2, 3],
    [0, 0, 0],
    [1000, 1001, 1002],  # Grote waarden
    [-1000, -999, -998],  # Kleine waarden
]

print("Softmax eigenschappen:")
for x in test_inputs:
    p = softmax(x)
    print(f"\nInput: {x}")
    print(f"Output: {p}")
    print(f"  Alle positief: {np.all(p > 0)}")
    print(f"  Som = {np.sum(p):.10f}")

In [None]:
# Opdracht 6b - Temperatuur effect
logits = np.array([1.0, 2.0, 0.5, 0.3])
temperatures = [0.5, 1.0, 2.0, 5.0]

fig, axes = plt.subplots(1, 4, figsize=(16, 4))

for ax, T in zip(axes, temperatures):
    probs = softmax(logits / T)
    ax.bar(range(len(probs)), probs, color='steelblue', edgecolor='black')
    ax.set_ylim(0, 1)
    ax.set_xlabel('Klasse')
    ax.set_ylabel('Kans')
    ax.set_title(f'T = {T}')
    ax.set_xticks(range(len(probs)))

plt.suptitle(f'Softmax temperatuur effect (logits = {list(logits)})', fontsize=14)
plt.tight_layout()
plt.show()

print("Observaties:")
print("- T < 1: Scherpere verdeling (meer confident)")
print("- T = 1: Standaard softmax")
print("- T > 1: Vlakkere verdeling (meer uniform)")
print("- T → 0: One-hot (argmax)")
print("- T → ∞: Uniform verdeling")

---

## Oefening 7: Classifier Calibratie - Oplossingen

In [None]:
# Opdracht 7a - Calibratie analyse
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier

# Laad MNIST
print("MNIST laden...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
X, y = mnist.data[:10000] / 255.0, mnist.target[:10000].astype(int)
X_train, X_test = X[:8000], X[8000:]
y_train, y_test = y[:8000], y[8000:]

# Train model
print("Model trainen...")
model = MLPClassifier(hidden_layer_sizes=(128,), max_iter=20, random_state=42)
model.fit(X_train, y_train)
print(f"Test accuracy: {model.score(X_test, y_test):.4f}")

# Verkrijg kansen
probs = model.predict_proba(X_test)
predictions = model.predict(X_test)
confidence = np.max(probs, axis=1)
correct = (predictions == y_test)

In [None]:
# Calibratie plot en ECE
n_bins = 10
bins = np.linspace(0, 1, n_bins + 1)
bin_indices = np.digitize(confidence, bins) - 1
bin_indices = np.clip(bin_indices, 0, n_bins - 1)

bin_accuracies = []
bin_confidences = []
bin_counts = []

for i in range(n_bins):
    mask = (bin_indices == i)
    if np.sum(mask) > 0:
        bin_accuracies.append(np.mean(correct[mask]))
        bin_confidences.append(np.mean(confidence[mask]))
        bin_counts.append(np.sum(mask))
    else:
        bin_accuracies.append(0)
        bin_confidences.append((bins[i] + bins[i+1]) / 2)
        bin_counts.append(0)

# ECE (Expected Calibration Error)
ece = 0
total_samples = len(y_test)
for i in range(n_bins):
    if bin_counts[i] > 0:
        ece += (bin_counts[i] / total_samples) * abs(bin_accuracies[i] - bin_confidences[i])

# Plot
plt.figure(figsize=(10, 6))
bin_centers = (bins[:-1] + bins[1:]) / 2
plt.bar(bin_centers, bin_accuracies, width=0.08, alpha=0.7, label='Accuracy per bin')
plt.plot([0, 1], [0, 1], 'r--', linewidth=2, label='Perfect calibration')
plt.xlabel('Confidence', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.title(f'Calibratie Plot (ECE = {ece:.4f})', fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.show()

print(f"Expected Calibration Error (ECE): {ece:.4f}")

---

## Oefening 8: Sampling - Oplossingen

In [None]:
# Opdracht 8a - Inverse transform sampling
def sample_exponential(lam, n_samples):
    """Trek samples uit Exp(λ) met inverse transform sampling."""
    u = np.random.uniform(0, 1, n_samples)
    # X = -ln(1-U)/λ, maar ln(U) heeft dezelfde verdeling als ln(1-U)
    x = -np.log(u) / lam
    return x

# Test
lam = 2
samples = sample_exponential(lam, 10000)

# Vergelijk met theoretische PDF
x = np.linspace(0, 4, 100)
pdf_theory = lam * np.exp(-lam * x)

plt.figure(figsize=(10, 5))
plt.hist(samples, bins=50, density=True, alpha=0.7, label='Samples')
plt.plot(x, pdf_theory, 'r-', linewidth=2, label=f'Exp(λ={lam}) PDF')
plt.xlabel('x')
plt.ylabel('Dichtheid')
plt.title('Inverse Transform Sampling voor Exponentiële Verdeling')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"Theoretisch gemiddelde: {1/lam}")
print(f"Sample gemiddelde: {np.mean(samples):.4f}")

---

## Bonusoefening: Naive Bayes - Oplossing

In [None]:
class NaiveBayes:
    def fit(self, X, y):
        """Leer P(klasse) en P(feature|klasse)."""
        self.classes = np.unique(y)
        self.n_classes = len(self.classes)
        self.n_features = X.shape[1]
        
        # P(klasse)
        self.class_prior = np.array([np.mean(y == c) for c in self.classes])
        
        # P(feature=1|klasse) met Laplace smoothing
        self.feature_prob = np.zeros((self.n_classes, self.n_features))
        for i, c in enumerate(self.classes):
            X_c = X[y == c]
            # Laplace smoothing: (count + 1) / (total + 2)
            self.feature_prob[i] = (np.sum(X_c, axis=0) + 1) / (len(X_c) + 2)
    
    def predict_proba(self, X):
        """Bereken P(klasse|features) voor elke sample."""
        n_samples = X.shape[0]
        log_probs = np.zeros((n_samples, self.n_classes))
        
        for i, c in enumerate(self.classes):
            # Log P(klasse)
            log_prior = np.log(self.class_prior[i])
            
            # Log P(features|klasse) = sum of log P(feature_j|klasse)
            # P(feature=1|klasse) als feature=1, P(feature=0|klasse) als feature=0
            log_likelihood = np.sum(
                X * np.log(self.feature_prob[i]) + 
                (1 - X) * np.log(1 - self.feature_prob[i]),
                axis=1
            )
            
            log_probs[:, i] = log_prior + log_likelihood
        
        # Normaliseer naar kansen
        log_probs -= np.max(log_probs, axis=1, keepdims=True)  # Stabiliteit
        probs = np.exp(log_probs)
        probs /= np.sum(probs, axis=1, keepdims=True)
        return probs
    
    def predict(self, X):
        """Voorspel de klasse."""
        probs = self.predict_proba(X)
        return self.classes[np.argmax(probs, axis=1)]

# Test op binarized MNIST
X_binary = (X > 0.5).astype(float)
X_train_bin, X_test_bin = X_binary[:8000], X_binary[8000:]

nb = NaiveBayes()
nb.fit(X_train_bin, y_train)
predictions = nb.predict(X_test_bin)
accuracy = np.mean(predictions == y_test)

print(f"Naive Bayes accuracy op MNIST: {accuracy:.4f}")

---

**Mathematical Foundations** | Les 8 Oplossingen | IT & Artificial Intelligence

---