# Customer Segmentation using K-Means Clustering
Segment customers based on behavior such as spending and frequency using unsupervised learning.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
sns.set(style="whitegrid")


In [None]:

# Simulate customer data
np.random.seed(42)
data = pd.DataFrame({
    'Annual Income (k$)': np.random.normal(60, 15, 200).astype(int),
    'Spending Score (1-100)': np.random.randint(1, 101, 200)
})
data.head()


In [None]:

# Basic EDA
print(data.describe())
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', data=data)
plt.title("Customer Distribution")
plt.savefig("customer_distribution.png")
plt.show()


In [None]:

scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)


In [None]:

# Elbow Method to determine optimal k
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(scaled_data)
    wcss.append(kmeans.inertia_)

plt.plot(range(1, 11), wcss, marker='o')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.title('Elbow Method')
plt.savefig("elbow_method.png")
plt.show()


In [None]:

# Apply KMeans with 4 clusters (from Elbow)
kmeans = KMeans(n_clusters=4, random_state=42)
clusters = kmeans.fit_predict(scaled_data)
data['Cluster'] = clusters

# Plot clusters
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', hue='Cluster', palette='tab10', data=data)
plt.title("Customer Segments")
plt.savefig("customer_segments.png")
plt.show()


In [None]:

# Reduce to 2D using PCA
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(scaled_data)
data['PCA1'] = reduced_data[:, 0]
data['PCA2'] = reduced_data[:, 1]

sns.scatterplot(x='PCA1', y='PCA2', hue='Cluster', palette='tab10', data=data)
plt.title("Clusters Visualized with PCA")
plt.savefig("pca_clusters.png")
plt.show()
