# Gaussian Mixture Models (GMM): Advanced Tutorial

**Gaussian Mixture Models (GMM)** are probabilistic models that assume the data is generated from a mixture of several Gaussian distributions with unknown parameters.
They are used for **unsupervised clustering**, **density estimation**, and **anomaly detection**.

## 1. Import Required Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.mixture import GaussianMixture
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

sns.set(style='whitegrid')


## 2. Generate Synthetic Data

In [None]:
X, y_true = make_blobs(n_samples=500, centers=3, cluster_std=0.60, random_state=0)
X = StandardScaler().fit_transform(X)

plt.scatter(X[:, 0], X[:, 1], s=40)
plt.title("Generated Synthetic Data")
plt.show()


## 3. Fit Gaussian Mixture Model

In [None]:
gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=42)
gmm.fit(X)
labels = gmm.predict(X)

plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis')
plt.title("GMM Clustering")
plt.show()


## 4. Compare with KMeans

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, random_state=42).fit(X)

fig, axs = plt.subplots(1, 2, figsize=(12, 5))
axs[0].scatter(X[:, 0], X[:, 1], c=kmeans.labels_, cmap='viridis')
axs[0].set_title("KMeans Clustering")

axs[1].scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
axs[1].set_title("GMM Clustering")

plt.show()


## 5. Model Evaluation

In [None]:
print("GMM Silhouette Score:", silhouette_score(X, labels))
print("AIC:", gmm.aic(X))
print("BIC:", gmm.bic(X))


## 6. Visualize Gaussian Contours

In [None]:
import matplotlib as mpl

def draw_ellipse(position, covariance, ax=None, **kwargs):
    from matplotlib.patches import Ellipse
    ax = ax or plt.gca()

    if covariance.shape == (2, 2):
        U, s, Vt = np.linalg.svd(covariance)
        angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
        width, height = 2 * np.sqrt(s)
    else:
        angle = 0
        width, height = 2 * np.sqrt(covariance)

    for nsig in range(1, 4):
        ax.add_patch(Ellipse(position, nsig * width, nsig * height,
                             angle, **kwargs))

plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=labels, s=30, cmap='viridis', zorder=2)

w_factor = 0.2 / gmm.weights_.max()
for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
    draw_ellipse(pos, covar, alpha=w * w_factor)

plt.title("GMM with Gaussian Ellipses")
plt.show()


## 7. Summary

- GMMs model clusters as Gaussian distributions
- Support soft clustering and probability estimates
- Great for density estimation and anomaly detection
- AIC and BIC help select the number of components