# Hello DBSCAN and HDBSCAN Clustering

In [1]:
# Prerequisites & Dependencies
import time
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d
import hdbscan as hdb

# Needed libraries
# pip install open3d
# pip install hdbscan

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


### Load Point Cloud

In [2]:
pc_inliers = o3d.io.read_point_cloud("point_clouds_public/lidar_ground_inliers.ply", format='ply')
pc_outliers = o3d.io.read_point_cloud("point_clouds_public/lidar_ground_outliers.ply", format='ply')

### DBSCAN Clustering using Open3d

In [3]:
epsilon = 0.5
min_points = 7

t_start = time.time()
labels = np.array(pc_outliers.cluster_dbscan(eps=epsilon, min_points=min_points))
t_end = time.time()

nr_clusters = labels.max()
print(f"Number of clusters: {nr_clusters}")
print(f"Clustering took {t_end-t_start:.4f} seconds")

Number of clusters: 295
Clustering took 0.3425 seconds


### Visualize

In [4]:
colors = plt.get_cmap("tab20")(labels / (nr_clusters if nr_clusters > 0 else 1))
colors[labels < 0] = 0
pc_outliers.colors = o3d.utility.Vector3dVector(colors[:, :3])

o3d.visualization.draw_geometries([pc_inliers, pc_outliers], window_name="DBSCAN Clustering" )

### Export

In [5]:
# Export as xyz file from Numpy
np.savetxt("outputs/lidar_dbscan_clusters.xyz", 
           np.hstack((np.asarray(pc_outliers.points), (labels+1).reshape(-1, 1))),
           delimiter=';', fmt='%1.9f')

### Parameter Tuning

In [6]:
perfs = {}

for i in range(1, 10, 1):
    t_start = time.time()
    labels = np.array(pc_outliers.cluster_dbscan(eps=1/10, min_points=3))
    t_end = time.time()
    nr_clusters = labels.max()
    perfs[i/10,3] = len(pc_outliers.points), t_end-t_start, nr_clusters + 1

for i in range(1, 10, 1):
    t_start = time.time()
    labels = np.array(pc_outliers.cluster_dbscan(eps=0.4, min_points=i))
    t_end = time.time()
    nr_clusters = labels.max()
    perfs[0.4,i] = len(pc_outliers.points), t_end-t_start, nr_clusters + 1

for key, value in perfs.items():
    print(f"{key}: {value}")

(0.1, 3): (47894, 0.05539989471435547, np.int32(1491))
(0.2, 3): (47894, 0.04614901542663574, np.int32(1491))
(0.3, 3): (47894, 0.0601048469543457, np.int32(1491))
(0.4, 3): (47894, 0.26137256622314453, np.int32(623))
(0.5, 3): (47894, 0.05063819885253906, np.int32(1491))
(0.6, 3): (47894, 0.04944729804992676, np.int32(1491))
(0.7, 3): (47894, 0.049839019775390625, np.int32(1491))
(0.8, 3): (47894, 0.03815865516662598, np.int32(1491))
(0.9, 3): (47894, 0.06108212471008301, np.int32(1491))
(0.4, 1): (47894, 0.23796844482421875, np.int32(1549))
(0.4, 2): (47894, 0.2680623531341553, np.int32(885))
(0.4, 4): (47894, 0.25456690788269043, np.int32(481))
(0.4, 5): (47894, 0.24903202056884766, np.int32(417))
(0.4, 6): (47894, 0.2651710510253906, np.int32(354))
(0.4, 7): (47894, 0.2515683174133301, np.int32(306))
(0.4, 8): (47894, 0.2501220703125, np.int32(274))
(0.4, 9): (47894, 0.2497878074645996, np.int32(241))


### Using HDBSCAN

HDBSCAN does not need epsilon to be set by user

In [7]:
clusters = hdb.HDBSCAN(min_cluster_size = 7, gen_min_span_tree=True)
t_start = time.time()
clusters.fit(pc_outliers.points)
t_end = time.time()
print(f"Clustering took {t_end-t_start:.4f} seconds")

labels = clusters.labels_
nr_clusters = labels.max()
print(f"Number of clusters: {nr_clusters}")




Clustering took 3.3231 seconds
Number of clusters: 389


### Visualize HDBSCAN

In [8]:
colors = plt.get_cmap("tab20")(labels / (nr_clusters if nr_clusters > 0 else 1))
colors[labels < 0] = 0
pc_outliers.colors = o3d.utility.Vector3dVector(colors[:, :3])

o3d.visualization.draw_geometries([pc_inliers, pc_outliers], window_name="DBSCAN Clustering" )