In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans


In [None]:
# Load the dataset
data = pd.read_csv("customers.csv")  # Make sure your file is named 'customers.csv'
data.head()


In [None]:
# Display info and check for missing values
data.info()
print("\nMissing values in each column:\n", data.isnull().sum())


In [None]:
# Selecting relevant columns for clustering
x = data[['Annual Income (k$)', 'Spending Score (1-100)']]
x.head()


In [None]:
wcss = []  # within-cluster sum of squares

for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)

# Plot Elbow Curve
plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), wcss, marker='o', linestyle='--')
plt.title("Elbow Method for Optimal K")
plt.xlabel("Number of Clusters")
plt.ylabel("WCSS")
plt.show()


In [None]:
# From the elbow curve, usually k=5 is optimal
kmeans = KMeans(n_clusters=5, init='k-means++', random_state=42)
y_kmeans = kmeans.fit_predict(x)

# Add cluster column to data
data['Cluster'] = y_kmeans
data.head()


In [None]:
plt.figure(figsize=(8, 6))
colors = ['red', 'blue', 'green', 'cyan', 'magenta']

for i in range(5):
    plt.scatter(x.values[y_kmeans == i, 0],
                x.values[y_kmeans == i, 1],
                s=80, c=colors[i], label=f'Cluster {i+1}')

# Plot cluster centers
plt.scatter(kmeans.cluster_centers_[:, 0], 
            kmeans.cluster_centers_[:, 1],
            s=250, c='yellow', marker='*', label='Centroids')

plt.title('Customer Segments Based on Income & Spending Score')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1–100)')
plt.legend()
plt.show()


In [None]:
# Display average values of each cluster
cluster_summary = data.groupby('Cluster')[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']].mean()
cluster_summary


In [None]:
data.to_csv("clustered_customers.csv", index=False)
print("Clustered customer data saved as clustered_customers.csv ✅")
