# Assignment 13 — Clustering Algorithms
K-Means & DBSCAN (Mall Customers / Iris)

## Import Libraries

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.datasets import load_iris
from sklearn.metrics import confusion_matrix


## Load Mall Customer Dataset
(If file is not available, replace with correct path)

In [None]:

# Try loading mall customers file if user has it, otherwise create small sample
try:
    df = pd.read_csv("Mall_Customers.csv")
except:
    df = pd.DataFrame({
        "CustomerID": range(1,21),
        "Age": np.random.randint(20,60,20),
        "Annual Income": np.random.randint(20,100,20),
        "Spending Score": np.random.randint(10,100,20)
    })

df.head()


## Select Features & Scale

In [None]:

X = df[['Annual Income', 'Spending Score']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled[:5]


## Apply K-Means

In [None]:

kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
labels_km = kmeans.fit_predict(X_scaled)

plt.scatter(X_scaled[:,0], X_scaled[:,1], c=labels_km, cmap='tab10')
plt.title("K-Means Clusters (Mall Data)")
plt.xlabel("Income (scaled)")
plt.ylabel("Spending (scaled)")
plt.show()


## Elbow Method

In [None]:

wcss = []
for k in range(1,8):
    km = KMeans(n_clusters=k, random_state=42, n_init=10)
    km.fit(X_scaled)
    wcss.append(km.inertia_)

plt.plot(range(1,8), wcss, marker='o')
plt.title("Elbow Method")
plt.xlabel("k")
plt.ylabel("WCSS")
plt.show()


## Apply DBSCAN

In [None]:

db = DBSCAN(eps=0.5, min_samples=4)
labels_db = db.fit_predict(X_scaled)

plt.scatter(X_scaled[:,0], X_scaled[:,1], c=labels_db, cmap='tab20')
plt.title("DBSCAN Clusters (Mall Data)")
plt.show()


## Iris Dataset — Clustering

In [None]:

iris = load_iris(as_frame=True)
X_iris = iris.data
y_true = iris.target

X_iris_scaled = StandardScaler().fit_transform(X_iris)

km_iris = KMeans(n_clusters=3, random_state=42, n_init=10)
iris_labels = km_iris.fit_predict(X_iris_scaled)

confusion_matrix(y_true, iris_labels)


## DBSCAN on Iris

In [None]:

db_iris = DBSCAN(eps=0.7, min_samples=5)
iris_db = db_iris.fit_predict(X_iris_scaled)

pd.crosstab(y_true, iris_db)


## Discussion
Write observations:
- elbow method selection
- cluster patterns
- DBSCAN noise points
- which algorithm worked better and why