In [None]:
Cell 1: Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import joblib

In [None]:
Cell 2: Load Data

In [None]:
df = pd.read_csv("../data/Mall_Customers.csv")
df.head()

In [None]:
Cell 3: Feature Selection

In [None]:
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]

In [None]:
Cell 4: Standardize Features

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
Cell 5: Elbow Method

In [None]:
wcss = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42, n_init=10)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8,5))
plt.plot(range(1,11), wcss, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.grid()
plt.show()

In [None]:
Cell 6: Silhouette Score & Final Clustering

In [None]:
k = 5
kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42, n_init=10)
clusters = kmeans.fit_predict(X_scaled)

score = silhouette_score(X_scaled, clusters)
print("Silhouette Score:", round(score, 2))

In [None]:
Cell 7: Plot Clusters

In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(x=X_scaled[:,0], y=X_scaled[:,1], hue=clusters, palette='Set1', s=100)
plt.title('Customer Segments')
plt.xlabel('Annual Income (scaled)')
plt.ylabel('Spending Score (scaled)')
plt.grid()
plt.show()

In [None]:
Cell 8: Save Models

In [None]:
joblib.dump(kmeans, "../models/kmeans_model.pkl")
joblib.dump(scaler, "../models/scaler.pkl")