<a href="https://colab.research.google.com/github/garvit69/DWDM-Labs/blob/main/7_dbscan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

In [None]:
def dbscan(X, eps, MinPts):
    # Calculate pairwise distances between data points
    nbrs = NearestNeighbors(n_neighbors=MinPts).fit(X)
    distances, indices = nbrs.kneighbors(X)
    
    # Initialize all points as unvisited (-1)
    labels = -np.ones(X.shape[0])
    
    # Initialize cluster counter
    cluster_count = 0
    
    # Iterate over all points
    for i in range(X.shape[0]):
        # If point has already been visited, skip
        if labels[i] != -1:
            continue
        
        # Find all points within eps distance
        neighbors = indices[i, distances[i] < eps]
        
        # If point has fewer than MinPts neighbors, mark as noise
        if len(neighbors) < MinPts:
            labels[i] = 0 # noise point
            continue
        
        # Increment cluster counter and mark point as a core point
        cluster_count += 1
        labels[i] = cluster_count
        
        # Find all neighbors of point within eps distance and mark them as belonging to same cluster
        j = 0
        while j < len(neighbors):
            neighbor = neighbors[j]
            if labels[neighbor] == 0: # if neighbor is noise, mark as border point
                labels[neighbor] = cluster_count
            elif labels[neighbor] == -1: # if neighbor is unvisited, mark as core point and find its neighbors
                labels[neighbor] = cluster_count
                new_neighbors = indices[neighbor, distances[neighbor] < eps]
                if len(new_neighbors) >= MinPts:
                    neighbors = np.concatenate((neighbors, new_neighbors))
            j += 1
    
    # Extract core points, border points, and noise points
    core_points = X[labels > 0]
    border_points = X[labels == 0]
    noise_points = X[labels == -1]
    
    return core_points, border_points, noise_points

In [None]:
# Input data points
X = np.array( [(4, 2), (3, 1), (2, 1), (1, 2), (5, 3), (3, 5), (5, 4), (2, 5), (3, 1), (6, 10), (4, 7), (9, 3), (5, 4)] )

# DBSCAN algorithm with eps=0.6 and MinPts=4
core_points, border_points, noise_points = dbscan(X, 0.6, 4)

# Print results
print("Core points:\n", core_points)
print("Border points:\n", border_points)
print("Noise points:\n", noise_points)

Core points:
 []
Border points:
 [[ 4  2]
 [ 3  1]
 [ 2  1]
 [ 1  2]
 [ 5  3]
 [ 3  5]
 [ 5  4]
 [ 2  5]
 [ 3  1]
 [ 6 10]
 [ 4  7]
 [ 9  3]
 [ 5  4]]
Noise points:
 []
