DB Scan Without Scikit learn

In [1]:
import numpy as np

def dbscan(data, eps, min_samples):
    n_samples = len(data)
    labels = np.full(n_samples, -1)  # Initialize all points as noise (-1)
    cluster_id = 0

    def region_query(point_idx):
        """Find all points within eps distance from the given point."""
        distances = np.linalg.norm(data - data[point_idx], axis=1)
        return np.where(distances <= eps)[0]

    def expand_cluster(point_idx, neighbors, cluster_id):
        """Expand the cluster using density-based connectivity."""
        labels[point_idx] = cluster_id
        i = 0
        while i < len(neighbors):
            neighbor_idx = neighbors[i]
            if labels[neighbor_idx] == -1:  # Previously marked as noise
                labels[neighbor_idx] = cluster_id
            elif labels[neighbor_idx] == 0:  # Not yet visited
                labels[neighbor_idx] = cluster_id
                new_neighbors = region_query(neighbor_idx)
                if len(new_neighbors) >= min_samples:
                    neighbors = np.append(neighbors, new_neighbors)
            i += 1

    for point_idx in range(n_samples):
        if labels[point_idx] != -1:  # Already processed
            continue
        neighbors = region_query(point_idx)
        if len(neighbors) < min_samples:
            labels[point_idx] = -1  # Mark as noise
        else:
            cluster_id += 1
            expand_cluster(point_idx, neighbors, cluster_id)

    return labels

# Example usage
data = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
eps = 2.0
min_samples = 2

labels = dbscan(data, eps, min_samples)
print("Cluster labels:", labels)

Cluster labels: [1 1 1 2 2 2]


DB Scan With Scikit learn

In [2]:
from sklearn.cluster import DBSCAN
import numpy as np

data = np.array([[1, 2], [1, 4], [1, 0], 
                 [10, 2], [10, 4], [10, 0]])

eps = 2.0
min_samples = 2

db = DBSCAN(eps=eps, min_samples=min_samples)
labels = db.fit_predict(data)

print("Cluster labels:", labels)

Cluster labels: [0 0 0 1 1 1]
