# DBScan - getting familiar with the algorithm

In [14]:
from sklearn.datasets import make_swiss_roll
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import numpy as np

In [15]:
n_samples = 1000
noise = 0.05

In [16]:
X, _ = make_swiss_roll(n_samples=n_samples, 
                       noise=noise)

In [None]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], 
           X[:, 1], 
           X[:, 2],
             c='blue', s=5)
plt.title("Swiss Roll Dataset")
plt.show()

In [18]:
pca = PCA(n_components=2)
X_reduced = pca.fit_transform(X)

In [30]:
eps = 1.0
min_samples = 4
model = DBSCAN(eps=eps, 
               min_samples=min_samples)

In [31]:
labels = model.fit_predict(X_reduced)

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(X_reduced[:, 0], 
            X_reduced[:, 1], 
            c=labels, 
            cmap='viridis', 
            s=10)
plt.title("DBScan Clusters -  Swiss Roll Dataset")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.show()
