In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score

In [None]:
# Generate random data with three clusters
n_samples = 777
n_features = 2
n_clusters = 3

data, labels = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=42)

In [None]:
# Initialize K-Means with the desired number of clusters
kmeans = KMeans(n_clusters=n_clusters)

# Fit the K-Means model to the data
kmeans.fit(data)

# Get cluster assignments for each data point
cluster_assignments = kmeans.labels_

In [None]:
cluster_assignments

In [None]:
# Plot the data points with color-coded clusters
plt.scatter(data[:, 0], data[:, 1], c=cluster_assignments)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', marker='x', s=100, label='Cluster Centers')
plt.title('K-Means Clustering')
plt.legend()
plt.show()

Now, let's demonstrate the elbow method to determine the optimal number of clusters:

In [None]:
# Calculate the sum of squared distances for different values of k
sse = []
silhouette_scores = {}

for k in range(2, 11):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(data)
    sse.append(kmeans.inertia_)
    cluster_assignments = kmeans.labels_
    silhouette_avg = silhouette_score(data, cluster_assignments)
    silhouette_scores[str(k)] = silhouette_avg


# Plot the elbow curve
plt.plot(range(2, 11), sse, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Sum of Squared Distances')
plt.title('Elbow Method for Optimal k')
plt.grid()
plt.show()

In [None]:
silhouette_scores