In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.cluster import AgglomerativeClustering


np.random.seed(42)

lowest_bic = np.infty
bic = []
n_components_range = range(1, 7)
best_gmm = None

for n_components in n_components_range:
    gmm = GaussianMixture(n_components=n_components, covariance_type='full', random_state=42)
    gmm.fit(data)
    bic_score = gmm.bic(data)
    bic.append(bic_score)
    if bic_score < lowest_bic:
        lowest_bic = bic_score
        best_gmm = gmm

print(f"{best_gmm.n_components}")


labels_first = best_gmm.predict(data)


centroids = []
for i in range(best_gmm.n_components):
    centroids.append(data[labels_first == i].mean(axis=0))
centroids = np.array(centroids)

agglo = AgglomerativeClustering(n_clusters=2)
labels_second = agglo.fit_predict(centroids)

print(labels_first[:10])
print(labels_second)

# 可视化
plt.scatter(data[:, 0], data[:, 1], c=labels_first, cmap='viridis', label='First-level clusters')
plt.scatter(centroids[:, 0], centroids[:, 1], c=labels_second, cmap='coolwarm', marker='X', s=200, label='Second-level clusters (centroids)')
plt.legend()
plt.title('Two-level Clustering with Bayesian Criterion')
plt.show()
