# Retail Customer Segmentation Using Unsupervised Learning

## Objective
Segment retail customers using unsupervised learning models.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score


In [None]:

df = pd.read_csv("Retail_Customer_Behavior_9000.csv")
df.head()


In [None]:

df.info()


In [None]:

features = df[["Age","Annual_Income","Total_Purchases","Avg_Order_Value",
               "Purchase_Frequency","Online_Visits","Customer_Tenure","Last_Purchase_Days"]]


In [None]:

scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)


In [None]:

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

plt.scatter(X_pca[:,0], X_pca[:,1], s=10)
plt.title("PCA Distribution")
plt.show()


In [None]:

kmeans = KMeans(n_clusters=4, random_state=42)
df["KMeans_Cluster"] = kmeans.fit_predict(X_scaled)

silhouette_score(X_scaled, df["KMeans_Cluster"])


In [None]:

hc = AgglomerativeClustering(n_clusters=4)
df["Hierarchical_Cluster"] = hc.fit_predict(X_scaled)

silhouette_score(X_scaled, df["Hierarchical_Cluster"])


In [None]:

dbscan = DBSCAN(eps=1.3, min_samples=6)
df["DBSCAN_Cluster"] = dbscan.fit_predict(X_scaled)

mask = df["DBSCAN_Cluster"] != -1
silhouette_score(X_scaled[mask], df["DBSCAN_Cluster"][mask])


In [None]:

gmm = GaussianMixture(n_components=4, random_state=42)
df["GMM_Cluster"] = gmm.fit_predict(X_scaled)

silhouette_score(X_scaled, df["GMM_Cluster"])


In [None]:

pca3 = PCA(n_components=3)
X_pca3 = pca3.fit_transform(X_scaled)

kmeans_pca = KMeans(n_clusters=4, random_state=42)
df["PCA_KMeans_Cluster"] = kmeans_pca.fit_predict(X_pca3)

silhouette_score(X_pca3, df["PCA_KMeans_Cluster"])


## Conclusion
PCA + KMeans performed best for customer segmentation.