# Advanced Clustering Techniques

This notebook demonstrates advanced clustering workflows.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from seiscae.clustering import GMMClusterer
from seiscae.visualization import (
    plot_latent_space_umap,
    plot_cluster_sizes,
    plot_gmm_selection_metrics,
)

## Compare Different Cluster Numbers

In [None]:
# Load features
features = np.load('../../results/features.npy')
print(f"Features shape: {features.shape}")

# Test different cluster numbers
cluster_range = [3, 5, 7, 10, 15]

results = {}
for n_clusters in cluster_range:
    clusterer = GMMClusterer(n_clusters=n_clusters)
    labels = clusterer.fit_predict(features)
    results[n_clusters] = {
        'labels': labels,
        'clusterer': clusterer,
    }
    print(f"k={n_clusters}: BIC={clusterer.model.bic(clusterer.scaler.transform(features)):.2f}")

## Visualize Different Clusterings

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for i, n_clusters in enumerate(cluster_range):
    labels = results[n_clusters]['labels']
    
    # Simple 2D projection for visualization
    from sklearn.decomposition import PCA
    pca = PCA(n_components=2)
    features_2d = pca.fit_transform(features)
    
    scatter = axes[i].scatter(
        features_2d[:, 0], features_2d[:, 1],
        c=labels, cmap='tab10', s=10, alpha=0.6
    )
    axes[i].set_title(f'k={n_clusters}')
    axes[i].set_xlabel('PC1')
    axes[i].set_ylabel('PC2')

plt.tight_layout()
plt.show()

## Automatic Cluster Selection

In [None]:
# Use automatic selection
clusterer_auto = GMMClusterer(n_clusters=None, max_clusters=20)
labels_auto = clusterer_auto.fit_predict(features)

print(f"Automatically selected {clusterer_auto.n_clusters} clusters")

# Plot selection metrics
plot_gmm_selection_metrics(
    clusterer_auto.selection_metrics_,
    save_path='../../results/gmm_selection.png'
)