In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
import seaborn as sns
sns.set_theme()

In [None]:
data = datasets.load_wine(as_frame=True)
df = data.frame

X = df.drop(columns='target').values
y = df['target'].values

# Standardize features (important for DBSCAN)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply DBSCAN
dbscan = DBSCAN(eps=1.5, min_samples=5)  # You can tune these
labels = dbscan.fit_predict(X_scaled)

In [None]:
# Plotting first two principal components for visualization
from sklearn.decomposition import PCA
X_pca = PCA(n_components=2).fit_transform(X_scaled)

plt.figure(figsize=(8,6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=labels, cmap='plasma', s=50)
plt.title("DBSCAN on Wine Dataset (2D PCA Projection)")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.colorbar(label="Cluster Label")
plt.show()

In [None]:
# Check how many clusters and noise points were found
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)

print(f"Number of clusters: {n_clusters}")
print(f"Number of noise points: {n_noise}")

Interestingly, the DBSCAN algorithm did not think that enough of the data was close enough in any
places to warrant being clustered. For this reason, the algorithm did not cluster any of the data
points together. This issue likely arises from this dataset because the classes are so similar, so without prior guidance that there are 3 distinct classes, it seems that the data is all in one
cluster, or in no clusters at all.