# Task 8: K-Means Clustering
AI/ML Internship - Harsh Patel

In [None]:
# Step 1: Import Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [None]:
# Step 2: Load and View the Dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

In [None]:
# Step 3: PCA for 2D Visualization
pca = PCA(n_components=2)
pca_data = pca.fit_transform(df)
pca_df = pd.DataFrame(data=pca_data, columns=['PCA1', 'PCA2'])

In [None]:
# Step 4: Elbow Method to Determine Optimal K
inertia = []
K_range = range(1, 11)
for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(pca_df)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(K_range, inertia, 'bo-')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Inertia')
plt.title('Elbow Method For Optimal K')
plt.grid(True)
plt.show()

In [None]:
# Step 5: Fit KMeans with Optimal K
optimal_k = 3
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
labels = kmeans.fit_predict(pca_df)
pca_df['Cluster'] = labels

In [None]:
# Step 6: Visualize Clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(data=pca_df, x='PCA1', y='PCA2', hue='Cluster', palette='viridis', s=100)
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=200, marker='X', label='Centroids')
plt.title('K-Means Clustering Visualization (PCA Reduced)')
plt.legend()
plt.show()

In [None]:
# Step 7: Evaluate with Silhouette Score
sil_score = silhouette_score(pca_df[['PCA1', 'PCA2']], labels)
print(f"Silhouette Score: {sil_score:.2f}")