In [None]:
# Importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans

# Load dataset (ensure correct filename if different)
df = pd.read_csv("customer_data.csv")

# Explore data
print(df.head())
print(df.info())

# Select features for clustering
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]

# Elbow Method to find optimal clusters
inertia = []
k_range = range(1, 11)
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

# Plot Elbow Curve
plt.figure(figsize=(8, 5))
plt.plot(k_range, inertia, marker='o')
plt.title('Elbow Method for Optimal K')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.grid(True)
plt.savefig("elbow_method.png")
plt.show()

# Apply KMeans with optimal K (e.g., 5 from elbow)
kmeans = KMeans(n_clusters=5, random_state=42)
df['Cluster'] = kmeans.fit_predict(X)

# Visualize Clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(
    x='Annual Income (k$)', y='Spending Score (1-100)', 
    hue='Cluster', data=df, palette='Set2', s=100
)
plt.title("Customer Segments")
plt.legend()
plt.grid(True)
plt.show()
