In [None]:
# Step 1: Import Necessary Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

# Step 2: Load and Explore the Dataset
# Assuming the dataset is saved as "Mall Customers.csv"
mall_data = pd.read_csv("Mall Customers.csv")

# Display the first few rows of the dataset
print(mall_data.head())

# Step 3: Preprocess the Data
# Select relevant features and standardize them
X = mall_data[['Annual Income (k$)', 'Spending Score (1-100)']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Apply DBSCAN
# Choose epsilon (eps) and minimum samples (min_samples)
eps = 0.5
min_samples = 5
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
mall_labels = dbscan.fit_predict(X_scaled)

# Step 5: Visualize the Clusters
# Plot the clusters using matplotlib.pyplot
plt.scatter(X['Annual Income (k$)'], X['Spending Score (1-100)'], c=mall_labels, cmap='viridis', marker='o', edgecolors='k')
plt.title('DBSCAN Clustering Mall Customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.show()

# Step 6: Explore the Result
# Calculate the number of clusters formed by DBSCAN
num_clusters = len(set(mall_labels)) - (1 if -1 in mall_labels else 0)
print(f"Number of clusters: {num_clusters}")
