In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN


Matplotlib is building the font cache; this may take a moment.


In [None]:
data = pd.read_csv("Mall_Customers.csv")

In [None]:
X = data[['Annual Income (k$)', 'Spending Score (1-100)']].values

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
dbscan = DBSCAN(eps=0.6, min_samples=5)
clusters = dbscan.fit_predict(X_scaled)
data['Cluster'] = clusters

In [None]:
plt.figure(figsize=(7, 5))
sns.scatterplot(data=data, x="Annual Income (k$)", y="Spending Score (1-100)", hue="Cluster", palette="tab10")
plt.title("Scatter Plot of Clusters")
plt.show()

In [None]:
sns.pairplot(data[['Age', 'Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']], hue="Cluster", palette="tab10")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.boxplot(data=data, x="Cluster", y="Spending Score (1-100)", palette="Set2")
plt.title("Boxplot of Spending Score by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.boxplot(data=data, x="Cluster", y="Annual Income (k$)", palette="Set3")
plt.title("Boxplot of Annual Income by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.countplot(data=data, x="Cluster", palette="Paired")
plt.title("Cluster Counts")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.histplot(data=data, x="Age", hue="Cluster", multiple="stack", palette="husl")
plt.title("Age Distribution by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.kdeplot(data=data, x="Annual Income (k$)", hue="Cluster", fill=True)
plt.title("KDE of Annual Income by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.kdeplot(data=data, x="Spending Score (1-100)", hue="Cluster", fill=True)
plt.title("KDE of Spending Score by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.violinplot(data=data, x="Cluster", y="Age", palette="Accent")
plt.title("Violin Plot of Age by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.swarmplot(data=data, x="Cluster", y="Annual Income (k$)", hue="Cluster", palette="tab10", dodge=True)
plt.title("Swarm Plot of Annual Income by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.stripplot(data=data, x="Cluster", y="Spending Score (1-100)", palette="Set1")
plt.title("Strip Plot of Spending Score by Cluster")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.heatmap(data[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.scatterplot(data=data, x="Age", y="Annual Income (k$)", hue="Cluster", palette="tab20")
plt.title("Clusters on Age vs Income")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.scatterplot(data=data, x="Age", y="Spending Score (1-100)", hue="Cluster", palette="tab20b")
plt.title("Clusters on Age vs Spending Score")
plt.show()

In [None]:
plt.figure(figsize=(7, 7))
data['Cluster'].value_counts().plot.pie(autopct='%1.1f%%', colors=sns.color_palette("Set2"))
plt.title("Cluster Size Distribution")
plt.ylabel("")
plt.show()