In [1]:
def find_neighbours(points, point, eps):
    neighbors = []
    for Pn in range(0, len(points)):
        given_point = points[point][0:-1]
        neighbouring_point = points[Pn]
        distance = numpy.linalg.norm(points[point][0:-1] - points[Pn][0:-1])
        if distance < eps:
            neighbors.append(Pn)

    return neighbors

def grow_cluster(data, labels, point, current_cluster_label, eps, min_points):
    search_queue = [point]
    # For each point in the queue:
    #   - Determine whether it is a branch or a leaf
    #   - For branch points, add their unclaimed neighbors to the search queue
    i = 0
    while i < len(search_queue):
        point = search_queue[i]
        neighbours = find_neighbours(data, point, eps)
        
        # leaf point, move on
        if len(neighbours) < min_points:
            i += 1
            continue
                   
        for Pn in neighbours:
            if labels[Pn] == -1:
               # Noise, can't be branch
               labels[Pn] = current_cluster_label

            elif labels[Pn] == 0:
                labels[Pn] = current_cluster_label
                search_queue.append(Pn)
        i += 1

def DBSCAN(points, eps, MinPts):
    labels = [0]*len(points)
    current_cluster_label = 0

    # Pick a random point, check if it is valid seed point and grow a new cluster
    # out of it if it is.  
     
    for point in range(0, len(points)):    
        if not (labels[point] == 0):
           continue
        
        neighbours = find_neighbours(points, point, eps)

        if len(neighbours) < MinPts:
            labels[point] = -1 # noise
            continue

        current_cluster_label += 1         
        labels[point] = current_cluster_label
        grow_cluster(points, labels, point, current_cluster_label, eps, MinPts)
    
    return labels

In [6]:
import pandas as pd
import numpy as np

columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Name']
dataset = pd.read_csv("../datasets/iris.csv", delimiter=',', names=columns)
df = pd.DataFrame(dataset[1:-1], columns = columns)
df = df.astype({'SepalLength':'float64', 'SepalWidth':'float64', 'PetalLength':'float64', 'PetalWidth':'float64'})
data = np.array(df)
#print(dataset[1:-1])
cluster = DBSCAN(data, 0.3, 1)
print(cluster)

[1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 5, 1, 6, 1, 7, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 7, 9, 10, 1, 1, 11, 1, 1, 1, 1, 12, 1, 1, 13, 1, 1, 1, 1, 1, 14, 15, 14, 16, 17, 18, 15, 19, 17, 20, 21, 22, 23, 24, 25, 17, 26, 16, 27, 16, 28, 29, 30, 24, 17, 17, 31, 32, 24, 33, 16, 16, 16, 34, 26, 35, 14, 27, 16, 16, 16, 24, 16, 19, 16, 16, 16, 17, 36, 16, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 38, 50, 51, 40, 52, 53, 54, 55, 56, 42, 28, 55, 57, 28, 28, 58, 59, 44, 60, 58, 61, 62, 63, 64, 40, 28, 49, 55, 65, 38, 55, 55, 65, 28, 47, 64]
