# Lesson 9: Latent Models

*Teachers:* Fares Schulz, Lina Campanella

In this course we will cover:
1. 
2. 

In [None]:
import random
import matplotlib.pyplot as plt
import numpy as np
import torch.distributions as distribution
import seaborn as sns
import torch
from torch.utils.data import random_split

SEED = 42

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
set_seed(SEED)

## Create data

In [None]:
from sklearn.datasets import make_blobs

X, _ = make_blobs(n_samples=400, centers=4, cluster_std=0.80, random_state=0)

fig, ax = plt.subplots(figsize=(8, 6))  # ✅ Better
ax.scatter(X[:, 0], X[:, 1], c='coral', s=50, edgecolor='w', alpha=0.8, zorder=2)
ax.grid(color='black', linewidth=0.5)
ax.set_title('Dataset with 4 unknown classes', color='black')
ax.set_xlabel('Feature 1', color='black')
ax.set_ylabel('Feature 2', color='black')
ax.tick_params(colors='black')
ax.set_xlim(-4, 4.5)
ax.set_ylim(-2, 11)
plt.show()

## Simple Gaussian does not fit well

In [None]:
from scipy.stats import multivariate_normal

# Manually compute mean and covariance
mu = X.mean(axis=0)
sigma = np.cov(X.T)

# Create grid for plotting
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 2
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), 
                     np.linspace(y_min, y_max, 100))

# Compute Gaussian PDF on grid
pos = np.dstack((xx, yy))
rv = multivariate_normal(mu, sigma)
z = rv.pdf(pos)

# Plot
fig, ax = plt.subplots(figsize=(8, 6))
fig.patch.set_facecolor('white')
ax.set_facecolor('white')

ax.scatter(X[:, 0], X[:, 1], c='coral', s=40, edgecolor='w', alpha=0.8, zorder=2)
ax.contourf(xx, yy, z, cmap='Oranges', alpha=1)
ax.grid(alpha=0.3)
ax.set_title('Simple Gaussian Distribution Fit to Data', color='black')
ax.set_xlabel('Feature 1', color='black')
ax.set_ylabel('Feature 2', color='black')
ax.tick_params(colors='black')
ax.set_xlim(-4, 4.5)
ax.set_ylim(-2, 11)
fig.tight_layout()


## Instead of one Gaussian, use a mixture of Gaussians

In [None]:
from matplotlib.patches import Ellipse
import matplotlib.pyplot as plt

def plot_gmm_step(X, means, covariances, weights, responsibilities=None, title="", classes=False):
    """
    Visualize GMM components and optionally responsibilities
    """
    fig, ax = plt.subplots(figsize=(8, 6))

    # Create grid for plotting
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 2
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), 
                         np.linspace(y_min, y_max, 100))
    pos = np.dstack((xx, yy))
    
    # # Compute mixture PDF
    z = np.zeros_like(xx)
    
    for k in range(len(means)):
        rv = multivariate_normal(means[k], covariances[k])
        z += weights[k] * rv.pdf(pos)
    
    colors = ['gold', 'darkorange', 'orangered', 'red']

    if classes is True:
        labels = np.argmax(responsibilities, axis=1)
        # Scatter plot each cluster with size based on responsibility
        for i in range(len(means)): 
            mask = labels == i
            size = 50 * (responsibilities[mask, :].max(1) ** 2)
            ax.scatter(X[mask, 0], X[mask, 1], c=colors[i], s=size, edgecolor='w', alpha=0.8, label=f'Class {i+1}', zorder=3)
    else:
        ax.scatter(X[:, 0], X[:, 1], c='coral', s=40, 
                    edgecolor='w', alpha=0.8, zorder=3)
    
    # Plot contours
    ax.contourf(xx, yy, z, cmap='Oranges', alpha=1)
    ax.set_xlabel('Feature 1', fontsize=12)
    ax.set_ylabel('Feature 2', fontsize=12)
    ax.set_title(title, fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3)
    ax.set_xlim(x_min, x_max)
    ax.set_ylim(y_min, y_max)
    fig.tight_layout()


# Initialize GMM parameters randomly
np.random.seed(42)
n_components = 4

# Random initialization (poor fit initially)
initial_means = np.random.randn(n_components, 2) 
initial_covariances = np.array([np.eye(2) * 2 for _ in range(n_components)])
initial_weights = np.ones(n_components) / n_components

print("Step 0: Random Initialization")
plot_gmm_step(X, initial_means, initial_covariances, initial_weights, 
              title="Step 0: Random Initialization")

In [None]:
def e_step(X, means, covariances, weights):
    """
    Expectation step: compute responsibilities (soft assignments)
    """
    N = X.shape[0] # number of data points
    C = len(means) # number of components
    responsibilities = np.zeros((N, C))
    
    # Compute probability of each point under each component
    for c in range(C):
        rv = multivariate_normal(means[c], covariances[c])
        responsibilities[:, c] = weights[c] * rv.pdf(X)
    
    # Normalize to get responsibilities (posterior probabilities)
    responsibilities /= responsibilities.sum(axis=1, keepdims=True)
    
    return responsibilities

# Run E-step
responsibilities = e_step(X, initial_means, initial_covariances, initial_weights)

print("\nStep 1: E-step - Assign points to clusters (soft assignment)")
plot_gmm_step(X, initial_means, initial_covariances, initial_weights, 
              responsibilities, 
              title="Step 1: E-step - Compute Responsibilities")

In [None]:
def m_step(X, responsibilities):
    """
    Maximization step: update means, covariances, and weights
    """
    N, d = X.shape # number of data points and dimensions
    C = responsibilities.shape[1] # number of components
    
    # Effective number of points assigned to each component
    Nk = responsibilities.sum(axis=0)
    
    # Update weights
    weights = Nk / N
    
    # Update means
    means = np.zeros((C, d))
    for c in range(C):
        means[c] = (responsibilities[:, c:c+1] * X).sum(axis=0) / Nk[c]
    
    # Update covariances
    covariances = np.zeros((C, d, d))
    for c in range(C):
        diff = X - means[c]
        covariances[c] = (responsibilities[:, c:c+1] * diff).T @ diff / Nk[c]
    
    return means, covariances, weights

# Run M-step
new_means, new_covariances, new_weights = m_step(X, responsibilities)

print("\nStep 2: M-step - Update parameters based on responsibilities")
plot_gmm_step(X, new_means, new_covariances, new_weights, 
              responsibilities,
              title="Step 2: M-step - Updated Parameters")

In [None]:
def train_gmm(X, n_iterations=100, means=None, covariances=None, weights=None, classes=False):
    
    for i in range(n_iterations):
        # E-step
        responsibilities = e_step(X, means, covariances, weights)
        
        # M-step
        means, covariances, weights = m_step(X, responsibilities)
        
    plot_gmm_step(X, means, covariances, weights, responsibilities, classes=classes,
                        title=f"Gaussian Mixture Model with {len(means)} components, {i+2} iterations")
            
    return means, covariances, weights

# Train GMM for a few iterations
final_means, final_covariances, final_weights = train_gmm(X, 99, new_means, new_covariances, new_weights, classes=False)

In [None]:
def predict_gmm(X, means, covariances, weights):
    """
    Predict cluster assignments based on highest responsibility
    """
    responsibilities = e_step(X, means, covariances, weights)
    return np.argmax(responsibilities, axis=1), responsibilities

z = np.zeros_like(xx)
colors = ['gold', 'darkorange', 'orangered', 'red' ]

fig, ax = plt.subplots(figsize=(8, 6))

# Scatter plot each cluster with size based on responsibility
for i in range(4):
    labels, responsibilities = predict_gmm(X, final_means, final_covariances, final_weights)
    mask = labels == i
    size = 80 * (responsibilities[mask, :].max(1) ** 2)
    ax.scatter(X[mask, 0], X[mask, 1], c=colors[i], s=size, edgecolor='w', alpha=0.8, label=f'Class {i+1}')
    
# Plot contours
ax.set_xlabel('Feature 1', fontsize=12)
ax.set_ylabel('Feature 2', fontsize=12)
ax.set_title('Predicted GMM Clusters', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
ax.legend(['Class 1', 'Class 2', 'Class 3', 'Class 4'], loc='upper right')
ax.set_xlim(-4,4.5)
ax.set_ylim(-2,11)
fig.tight_layout()

## Also possible with Skicit-learn's GaussianMixture function

In [None]:
from sklearn.mixture import GaussianMixture

gmm = GaussianMixture(n_components=4).fit(X) # uses EM algorithm and 100 iterations by default
labels = gmm.predict(X)

colors = ['gold', 'darkorange', 'orangered', 'red' ]

fig, ax = plt.subplots(figsize=(8,6))

for i in range(4):
    mask = labels == i
    responsibilities = gmm.predict_proba(X)
    size = 80 * (responsibilities[mask, :].max(1) ** 2)
    ax.scatter(X[mask, 0], X[mask, 1], c=colors[i], s=size, edgecolor='w', alpha=0.8, label=f'Class {i+1}')

# ax.scatter(X[:, 0], X[:, 1], c=[colors[label] for label in labels], s=40, edgecolor='w', alpha=0.8)
ax.set_xlabel('Feature 1', fontsize=12)
ax.set_ylabel('Feature 2', fontsize=12)
ax.set_title('Predicted GMM Clusters with Scikit-learn', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
ax.legend( loc='upper right')
ax.set_xlim(-4,4.5)
ax.set_ylim(-2,11)
fig.tight_layout()

## Fit with streched data

In [None]:
rng = np.random.RandomState(13)
X_stretched = np.dot(X, rng.randn(2, 2))

fig, ax = plt.subplots(figsize=(8, 6))  # ✅ Better
ax.scatter(X_stretched[:, 0], X_stretched[:, 1], c='coral', s=60, edgecolor='w', alpha=0.8, zorder=2)
ax.grid(color='black', linewidth=0.5)
ax.set_title('Dataset with 4 unknown classes', color='black')
ax.set_xlabel('Feature 1', color='black')
ax.set_ylabel('Feature 2', color='black')
ax.tick_params(colors='black')
ax.set_xlim(-3, 3)
ax.set_ylim(-2, 5)
plt.show()

In [None]:
# Train GMM for more elliptical data
np.random.seed(0) 
initial_means = np.random.randn(n_components, 2)
initial_covariances = np.array([np.eye(2) * 3 for _ in range(n_components)])
initial_weights = np.ones(n_components) / n_components

final_means, final_covariances, final_weights = train_gmm(X_stretched, 99, initial_means, initial_covariances, initial_weights, classes=True)

## ELBO

In [None]:
# Elbo = log(p(x|theta))-D_KL(q(z)||p(z|x,theta))
def log_likelihood(X, means, covariances, weights):
    """Compute log-likelihood of data under GMM"""
    N = X.shape[0]
    log_likelihood = 0
    
    for i in range(N):
        prob = 0
        for k in range(len(means)):
            rv = multivariate_normal(means[k], covariances[k])
            prob += weights[k] * rv.pdf(X[i])
        log_likelihood += np.log(prob)
    
    return log_likelihood

def compute_elbo(X, means, covariances, weights, responsibilities):
    """Compute ELBO (Evidence Lower Bound)"""
    N = X.shape[0]
    elbo = 0
    
    # E[log p(x,z|theta)]
    for i in range(N):
        for k in range(len(means)):
            if responsibilities[i, k] > 1e-10:  # Avoid log(0)
                rv = multivariate_normal(means[k], covariances[k])
                elbo += responsibilities[i, k] * (
                    np.log(weights[k]) + np.log(rv.pdf(X[i]) + 1e-10)
                )
    
    # - E[log q(z)]
    for i in range(N):
        for k in range(len(means)):
            if responsibilities[i, k] > 1e-10:
                elbo -= responsibilities[i, k] * np.log(responsibilities[i, k])
    
    return elbo

# Visualization: sweep over mean of first component
np.random.seed(42)
n_components = 1
initial_means = np.random.randn(n_components, 2) 
initial_covariances = np.array([np.eye(2) * 2 for _ in range(n_components)])
initial_weights = np.ones(n_components) / n_components

# Run one E-step and one M-step
responsibilities_0 = e_step(X, initial_means, initial_covariances, initial_weights)
means_1, covariances_1, weights_1 = m_step(X, responsibilities_0)
responsibilities_1 = e_step(X, means_1, covariances_1, weights_1)

# Sweep mean of first component along x-axis
mean_range = np.linspace(-3, 3, 50)
log_likelihoods = []
elbos_0 = []
elbos_1 = []

for mean_val in mean_range:
    # Test with varying first component mean
    test_means_0 = initial_means.copy()
    test_means_0[0, 0] = mean_val
    
    test_means_1 = means_1.copy()
    test_means_1[0, 0] = mean_val
    
    # Compute log-likelihood (same for both since it's objective)
    ll = log_likelihood(X, test_means_0, initial_covariances, initial_weights)
    log_likelihoods.append(ll)
    
    # ELBO with initial q (E-step 0)
    elbo_0 = compute_elbo(X, test_means_0, initial_covariances, initial_weights, responsibilities_0)
    elbos_0.append(elbo_0)
    
    # ELBO with updated q (E-step 1)
    elbo_1 = compute_elbo(X, test_means_1, covariances_1, weights_1, responsibilities_1)
    elbos_1.append(elbo_1)

# Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Left plot: Initial iteration
ax1.plot(mean_range, log_likelihoods, 'k-', linewidth=2.5, label=r'$\log p(\mathbf{x}|\theta)$')
ax1.plot(mean_range, elbos_0, 'coral', linewidth=2, label=r'ELBO[$q^{[0]}, \theta^{[0]}$]')
ax1.axvline(initial_means[0, 0], color='coral', linestyle='--', alpha=0.5)
ax1.scatter([initial_means[0, 0]], [elbos_0[np.argmin(np.abs(mean_range - initial_means[0, 0]))]], 
           color='coral', s=100, zorder=5)
ax1.set_xlabel(r'$\theta$ (mean of component 1)', fontsize=12)
ax1.set_ylabel(r'$\log [ p(\mathbf{x}|\theta)]$', fontsize=12)
ax1.set_title('E-step: Fix θ, optimize q', fontsize=13, fontweight='bold')
ax1.legend(fontsize=11)
ax1.grid(alpha=0.3)

# Right plot: After M-step
ax2.plot(mean_range, log_likelihoods, 'k-', linewidth=2.5, label=r'$\log p(\mathbf{x}|\theta)$')
ax2.plot(mean_range, elbos_0, 'coral', linewidth=2, alpha=0.5, label=r'ELBO[$q^{[0]}, \theta^{[0]}$]')
ax2.plot(mean_range, elbos_1, 'turquoise', linewidth=2, label=r'ELBO[$q^{[1]}, \theta^{[1]}$]')
ax2.axvline(initial_means[0, 0], color='coral', linestyle='--', alpha=0.5)
ax2.axvline(means_1[0, 0], color='turquoise', linestyle='--', alpha=0.5)
ax2.scatter([initial_means[0, 0]], [elbos_0[np.argmin(np.abs(mean_range - initial_means[0, 0]))]], 
           color='coral', s=100, zorder=5)
ax2.scatter([means_1[0, 0]], [elbos_1[np.argmin(np.abs(mean_range - means_1[0, 0]))]], 
           color='turquoise', s=100, zorder=5)
ax2.set_xlabel(r'$\theta$ (mean of component 1)', fontsize=12)
ax2.set_ylabel(r'$\log [ p(\mathbf{x}|\theta)]$', fontsize=12)
ax2.set_title('M-step: Fix q, optimize θ', fontsize=13, fontweight='bold')
ax2.legend(fontsize=11)
ax2.grid(alpha=0.3)

plt.tight_layout()
plt.show()