In [20]:
# Required Libraries
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score, confusion_matrix
from yellowbrick.cluster import KElbowVisualizer
from scipy.stats import mode
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Enable inline plotting for Jupyter notebooks
%matplotlib inline

# This line generates synthetic dataset for clustering
data, true_labels = make_blobs(n_samples=300, centers=4, cluster_std=0.6, random_state=0)

# Using the Elbow method to find the optimal number of clusters
elbow_visualizer = KElbowVisualizer(KMeans(), k=(1, 11))
elbow_visualizer.fit(data)

# Fit the KMeans model based on the optimal number of clusters
optimal_k = elbow_visualizer.elbow_value_
kmeans_model = KMeans(n_clusters=optimal_k)
predicted_clusters = kmeans_model.fit_predict(data)

# Assign most frequent true label to each cluster
final_labels = np.zeros_like(predicted_clusters)
for cluster in range(optimal_k):
    cluster_mask = (predicted_clusters == cluster)
    final_labels[cluster_mask] = mode(true_labels[cluster_mask])[0]

# Calculate accuracy of the clustering model
clustering_accuracy = accuracy_score(true_labels, final_labels)
print(f"Clustering accuracy with {optimal_k} clusters: {clustering_accuracy:.2f}")

# Create and display the confusion matrix
conf_matrix = confusion_matrix(true_labels, final_labels)
plt.figure(figsize=(8, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False, 
            xticklabels=range(1, optimal_k + 1), yticklabels=range(1, optimal_k + 1))
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()


ModuleNotFoundError: No module named 'distutils'