In [None]:
import numpy as np
import matplotlib.pyplot as plt
import umap
import matplotlib.colors as mcolors
from sklearn.datasets import make_blobs

# Function to create density-based heatmap
def create_density_heatmap(x, y, bins=300):
    
    heatmap, xedges, yedges = np.histogram2d(x, y, bins=bins)
    print(f"Heatmap max value: {np.max(heatmap)}")
    heatmap = heatmap.T  # Transpose to match imshow convention
    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
    return heatmap, extent

# Generate synthetic data for testing
n_samples = 1000
n_features = 10
n_clusters = 4

X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, cluster_std=0.6, random_state=42)

# Perform UMAP dimensionality reduction
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean')
embedding_outputs = reducer.fit_transform(X)

# Create a colormap for the ground truth labels
cmap_ground_truth = plt.get_cmap("tab10")
ground_truth_label_colors = {label: cmap_ground_truth.colors[label % len(cmap_ground_truth.colors)] for label in np.unique(y)}

# Scatter plot of UMAP embeddings with ground truth labels
plt.figure(figsize=(8, 8))
for label in np.unique(y):
    mask = y == label
    plt.scatter(embedding_outputs[mask, 0], embedding_outputs[mask, 1], color=ground_truth_label_colors[label], label=f"Label {label}")
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.title("Scatter Plot of UMAP Embeddings with Ground Truth Labels")
plt.legend()
plt.show()

# Density-based heatmap for the first cluster
x, y = embedding_outputs[:, 0], embedding_outputs[:, 1]
heatmap, extent = create_density_heatmap(x, y)
plt.figure(figsize=(8, 8))
plt.imshow(heatmap, extent=extent, origin='lower', cmap='hot', alpha=0.8)
plt.colorbar(label='Density')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.title("Density-Based Heatmap of UMAP Embeddings")
plt.show()
