# Experimente mit DBSCAN
Cluster Nachbarschaft in einem Radius um einen Saatpunkt mit DBSCAN. Dabei kann mit Parametern / Schwellenwerten experimentiert werden.

Zwei Visualisierungen:
- Anzahl der Cluster für jeden Saatpunkt
- für einen gewählten Saatpunkt: Geclusterte Nachbarschaft

In [1]:
import pdal 
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d
from scipy.spatial import KDTree
import os
import json
import pyvista as pv
import time

from interessant import * # Bei Änderungen Kernel neu starten

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


Datei wählen

In [2]:
run = run24
#run = run14

# Bahnsteig: 29; Gleis hohe Intensität: 11; Weiche B: 16; Unterirdischer Bhf: 20; Gleis weit abseits: 23; Betondeckel: 28; Zug run 14 A (in run24 Achszähler): 6; 
# Viele Gleise: 33; Anfang Weiche: 34; Weiche C: 38 OLA gleiche H: 35; Y: 37
key = list(interessant.keys())[34] 
filename = interessant[key]
print(key, filename)

filename = os.path.join(run, filename)
if not os.path.exists(filename):
    raise FileNotFoundError(filename)

Anfang Weiche 4481275_5357000.copc.laz


## Parameter

In [3]:
thresh = 8  # z.B. 5 oder 8
majority_tresh  = 0.5 # Erster Durchgang 0.3, bei "Gleis hohe Intensität" gibt 0.5 ein viel besseres Ergebnis

voxel_size = 1.0

voxel_size = 25 / 30
print("Voxel size:", voxel_size)

minimum_points = 50 # Erste Versuche mit 100, aber viel schwarz bei abseits liegenden Gleisen. 50 ist besser.
minimum_in_hood = 10
linearity_tresh = 0.98

intensity_threshold = 14500
downsample_radius = 0.3
neighborhood_radius = 0.5

Voxel size: 0.8333333333333334


## Voxelfilter

In [4]:
pipeline = pdal.Pipeline([pdal.Reader(filename)])
pipeline.execute()
points = pipeline.arrays[0]

In [5]:
xyz = np.vstack((points['X'], points['Y'], points['Z'])).transpose()

In [6]:
# Offset entfernen (aber gerundet, damit Kachelgrenzen ganze Zahlen bleiben)
offset = xyz.mean(axis=0).round() 
# xyz -= offset   # Nur für Visualisierung benötigt

In [7]:
points['Classification'] = 0 # Unclassified
RAIL = 20

In [8]:
maxp = xyz.max(axis=0)
minp = xyz.min(axis=0)
maxp, minp

(array([4.48130000e+06, 5.35702500e+06, 4.65140288e+02]),
 array([4.48127500e+06, 5.35700000e+06, 4.49770988e+02]))

In [9]:
voxels = xyz.copy()
voxels[:, :2] = ((xyz[:, :2] - minp[:2]) // voxel_size).astype(int)

In [10]:
# Anzahl der Voxel checken
np.ceil((maxp[:2] - minp[:2]) / voxel_size).astype(int)

array([30, 30])

In [11]:
from collections import defaultdict
voxel_dict = defaultdict(list)
index_dict = defaultdict(list)

# Füllen des Dictionaries
for idx, (point, voxel) in enumerate(zip(xyz, voxels)):
    voxel_key = tuple(voxel[:2])
    voxel_dict[voxel_key].append(point[2])
    index_dict[voxel_key].append(idx)

In [12]:
for key, z_values in voxel_dict.items():
    
    # Threshold on number of points in voxel
    if len(z_values) < minimum_points:
        continue

    indices = np.array(index_dict[key])
    z_values = np.array(z_values)
    ground_level = np.percentile(z_values, 10) # 10% Percentile
    # Check that there are almost no points 0.5 to 4.5 m above the ground
    # But allow for some noise
    # thresh = 3 # Der einfachheit halber oben
    count = ((z_values > ground_level + 0.5) & (z_values < ground_level + 4.5)).sum()

    if count <= thresh:
        # Look for points within 0.5 m above ground and get 98% percentile ODER 99.5
        mask = (z_values > ground_level) & (z_values < ground_level + 0.5)
        try:
            candidates_top = np.percentile(z_values[mask], 99.5)
        except IndexError:
            # Fails if there are no points in the masked array
            continue

        # Oude Elberink require the height difference > 0.1 m
        # And mark only the points 10 cm below the top as rail point candidates
        if candidates_top - ground_level > 0.1:
            mask = (z_values > candidates_top - 0.1) & (z_values < candidates_top + 0.05)

            # Also make sure these are only a minority of the points (otherwise it's a slope)
            if mask.sum() < majority_tresh * len(z_values):  # z.B. 0.3
                points['Classification'][indices[mask]] = RAIL


In [13]:
candidates = points[points["Classification"] == RAIL]
candidates.shape

(154816,)

## View Settings

In [14]:
# Viewsettings mit strg + c kopieren und hier einfügen

viewsettings = '''
{
	"class_name" : "ViewTrajectory",
	"interval" : 29,
	"is_loop" : false,
	"trajectory" : 
	[
		{
			"boundingbox_max" : [ 11.999975427985191, 11.99998692702502, 13.124079998226534 ],
			"boundingbox_min" : [ -13.000024572014809, -13.00001307297498, -3.9965200017734333 ],
			"field_of_view" : 60.0,
			"front" : [ -0.20468464372193082, -0.82045900926496551, 0.53380821531742795 ],
			"lookat" : [ -2.1145501200370735, -2.6052610037108783, 1.4494799802055294 ],
			"up" : [ 0.19010212482081987, 0.50164960558000959, 0.84392467398461002 ],
			"zoom" : 0.55999999999999983
		}
	],
	"version_major" : 1,
	"version_minor" : 0
}

'''

viewsettings = json.loads(viewsettings)

front = viewsettings["trajectory"][0]["front"]
lookat = viewsettings["trajectory"][0]["lookat"]
up = viewsettings["trajectory"][0]["up"]
zoom = viewsettings["trajectory"][0]["zoom"]

## Kandidaten- und Saatpunkte

Noise Filter

In [15]:
# filters.outlier sets Classification to 7, filters.range removes the points with Classification 7

noise_filter = pdal.Filter("filters.outlier", method="statistical", mean_k=10, multiplier=2.0).pipeline(candidates) | pdal.Filter("filters.range", limits="Classification![7:7]")
print(noise_filter.toJSON())
noise_filter.execute()
candidates = noise_filter.arrays[0]
candidates.shape 

[{"type": "filters.outlier", "method": "statistical", "mean_k": 10, "multiplier": 2.0, "tag": "filters_outlier1"}, {"type": "filters.range", "limits": "Classification![7:7]", "tag": "filters_range1"}]


(151900,)

In [16]:
xyz = np.vstack((candidates['X'], candidates['Y'], candidates['Z'])).transpose()
xyz -= offset

Saatpunkte

In [17]:
low_intensity = candidates[candidates["Intensity"] < intensity_threshold]
low_intensity.shape

(96434,)

In [18]:
# Downsample with poisson sampling

downsampling_pipeline = pdal.Filter("filters.sample", radius=downsample_radius).pipeline(low_intensity)
downsampling_pipeline.execute()
seed_points = downsampling_pipeline.arrays[0]
seed_points.shape 

(614,)

In [19]:
xyz_seed = np.vstack((seed_points['X'], seed_points['Y'], seed_points['Z'])).transpose()
xyz_seed -= offset


k-d-Baum

In [20]:
# k-D tree with all candidate points
tree = KDTree(xyz)  

In [21]:
# indices: ndarray (dtype object) with a list of indices for each seed point
indices = tree.query_ball_point(xyz_seed, r=neighborhood_radius)

In [22]:
seed_point_count = xyz_seed.shape[0]

## Funktionen

In [23]:
def pca(cloud):
    """Use PCA to get einvalues and eigenvectors of a point cloud"""
    mean = np.mean(cloud, axis=0)
    centered = cloud - mean
    cov_matrix = np.cov(centered, rowvar=False) # row variance nicht berechnen
    eigenvals, eigenvecs = np.linalg.eig(cov_matrix)
    sorted_indices = np.argsort(eigenvals)[::-1]
    sorted_eigenvals = eigenvals[sorted_indices]
    sorted_eigenvecs = eigenvecs[:,sorted_indices]
    # Returned vectors are in columns, first vector is eigenvec[:, 0] == eigenvec.T[0]
    return sorted_eigenvals, sorted_eigenvecs

def linearity(eigenvals):
    """Calculate the linearity of a point cloud"""
    return (eigenvals[0] - eigenvals[1]) / eigenvals[0]



Varianten von dbscan (alle mit Open3D implementiert), mit und ohne Multiplikation der z-Werte mit einem Faktor

In [24]:
def dbscan(xyz, eps=0.05, min_points=10):
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [25]:
def dbscan_stretchz(xyz, eps=0.05, min_points=10, stretch=1.5):
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [26]:
def dbscan_stretchz_auto(xyz, min_points=10, stretch=1.5):

    pointcount = xyz.shape[0]
    if pointcount <= min_points:
        return np.ones(pointcount, dtype=np.int8) * -1
    
    eps = 50 / pointcount
    eps = max(eps, 0.06)
    
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [27]:
def plot_cluster_line(cluster, eigenvects, stretch=0.2):
    """Plot the first eigenvector of a cluster as a line (PyVista)"""
    start = cluster.mean(axis=0)
    vector = eigenvects.T[0] * stretch
    end = start + vector
    line = pv.Line(start, end)

    return line


In [28]:
def plot_first_eigenvec(point, eigenvects, color=[1, 0, 0], stretch=1):
    """Plot a vector as a line (Open3D)"""
    vector = eigenvects.T[0] * stretch
    end = point + vector
    line = o3d.geometry.LineSet()
    line.points = o3d.utility.Vector3dVector([point, end])
    line.lines = o3d.utility.Vector2iVector([[0, 1]])
    line.colors = o3d.utility.Vector3dVector([color])
    return line

## Visualisierung: Anzahl der Cluster pro Saatpunkt
Hier können die Parameter und Schwellenwerte experimentell angepasst werden.

In [29]:
min_points = 10
minimum_in_hood = 10
linearity_tresh = 0.98 # Kononen et al. 2024 verwenden threshold 0.98 (behalten aber zusätzlich auch Punkte in Nachbarschaften mit hoher Punktdichte)

with_lines = False

In [30]:
linearity_at_seed = np.empty((seed_point_count,1), dtype=float)
linearity_at_seed[:,:] = np.nan
trackcount = np.zeros((seed_point_count,1), dtype=int)
clustercount = np.zeros((seed_point_count,1), dtype=int)

lines = []

for i in range(seed_point_count):
    hood = xyz[indices[i]]
    if hood.shape[0] < minimum_in_hood:   
        continue


    # DBSCAN with stretched z
    # labels = dbscan_stretchz(hood, eps=2, min_points=min_points, stretch=4) # Weiche C funktioniert eps=0.07, min_points=20, stretch=4
    labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)
    max_label = labels.max()
    clustercount[i] = max_label + 1


    for label in range(max_label + 1):
        cluster = hood[labels == label]
        try:
            eigenvals, eigenvects = pca(cluster)
        except np.linalg.LinAlgError: # Bei SVD: ValueError
            # Seltene Fälle von 0 Punkten im Cluster
            continue
        cluster_linearity = linearity(eigenvals)



        
        if (cluster_linearity > linearity_tresh) and (eigenvals[0] > 0.02):
            # eigenvals[0] ist Varianz in 1. PC, also abhängig von der Länge
            trackcount[i] += 1
            if with_lines:
                line = plot_cluster_line(cluster, eigenvects)
                lines.append(line)

                


print(trackcount.max())
print(clustercount.max())

3
12


Ggf. Schieberegler nach links schieben, um alle Punkte sichtbar zu machen

In [None]:
# Use pyvista to get scalar colors with color bar
pcd_hood = pv.PolyData(xyz_seed)
pcd_hood["trackcount"] = trackcount
pcd_hood["clustercount"] = clustercount

p = pv.Plotter()

for line in lines:
    p.add_mesh(line, color='red')

p.add_mesh_threshold(pcd_hood, 'clustercount', all_scalars=True, render_points_as_spheres=True, point_size=10)
p.show()

Widget(value='<iframe src="http://localhost:35059/index.html?ui=P_0x7fe33428df70_0&reconnect=auto" class="pyvi…

 JS Error => error: Uncaught TypeError: Cannot mix BigInt and other types, use explicit conversions


In [32]:
i = 120 # Saatpunkt wählen

min_points = 10 

lines = []
trackcount = 0

hood = xyz[indices[i]]
if hood.shape[0] < minimum_in_hood:   
    raise ValueError("Not enough points in neighborhood") # continue
print(f"Seed point {i} has {hood.shape[0]} points in its neighborhood")

# eigenvals, eigenvects = pca(hood)
# linearity_at_seed = linearity(eigenvals)

# print(f"Linearity of {i}: {linearity_at_seed}")
# print(f"Eigenvalues: {eigenvals}")
# print(f"Eigenvectors: {eigenvects}")



# Verschiedene experimentelle Varianten

# labels = dbscan(hood, eps=0.05, min_points=min_points)
# labels = dbscan_stretchz(hood, eps=0.05, min_points=min_points, stretch=2)
# labels = dbscan_stretchz_noise(hood, eps=0.055, min_points=min_points, stretch=2.5, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz_noise(hood, eps=0.07, min_points=min_points, stretch=3, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz(hood, eps=0.09, min_points=min_points, stretch=5)
labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)

max_label = labels.max()
clustercount = max_label + 1
print()
print(f"Found {max_label + 1} clusters")
# print(max_label + 1, "Clusters")
for label in range(max_label + 1):
    cluster = hood[labels == label]
    eigenvals, eigenvects = pca(cluster)
    cluster_linearity = linearity(eigenvals)
    projected = hood @ eigenvects[:,0] # Project on first eigenvector
    # Length along first principal component
    # lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects))
    # print(cluster_linearity, end=" ")
    print(f"Cluster {label}: {labels[labels == label].shape[0]} points")
    print(f"Linearity: {cluster_linearity} {cluster_linearity > linearity_tresh}")
    print(f"Eigenvalues: {eigenvals}")
    print(f"Eigenvectors: {eigenvects}")
    
    if cluster_linearity > linearity_tresh:
        if eigenvals[0] > 0.02:
            trackcount += 1
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, stretch=eigenvals[0]*5))
        else:
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0.7], stretch=eigenvals[0]*5))
    else:
        lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0], stretch=eigenvals[0]*5))
# print("\nTrack count", trackcount[i])
print(f"Track count: {trackcount}")

# print()

colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = [0.5, 0.5, 0.5, 1] # Farbe für Punkte, die zu keinem Cluster gehören
hood_pcd = o3d.geometry.PointCloud()
hood_pcd.points = o3d.utility.Vector3dVector(hood)
hood_pcd.colors = o3d.utility.Vector3dVector(colors[:,:3])
o3d.visualization.draw_geometries([hood_pcd] + lines)

Seed point 120 has 360 points in its neighborhood

Found 1 clusters
Cluster 0: 360 points
Linearity: 0.9855591185033515 True
Eigenvalues: [0.08124895 0.00117331 0.00015258]
Eigenvectors: [[ 0.9989192  -0.00115508  0.04646612]
 [ 0.04519998  0.25717566 -0.96530702]
 [-0.01083495  0.96636398  0.25694992]]
Track count: 1
