# Experimente mit DBSCAN

In [42]:
import pdal 
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d
from scipy.spatial import KDTree
from scipy.sparse.csgraph import connected_components
import scipy.sparse as sp
import os
import json
import pyvista as pv
import time

from interessant import * # Bei Änderungen Kernel neu starten

In [43]:
run = run24
#run = run14
# filename = interessant['OLA gleiche Höhe wie Gleis']

# Bahnsteig: 29; Gleis hohe Intensität: 11; Weiche B: 16; Unterirdischer Bhf: 20; Gleis weit abseits: 23; Betondeckel: 28; Zug run 14 A (in run24 Achszähler): 6; 
# Viele Gleise: 33; Anfang Weiche: 34; Weiche C: 38 OLA gleiche H: 35; Y: 37
key = list(interessant.keys())[6] 
filename = interessant[key]
print(key, filename)

filename = os.path.join(run, filename)
if not os.path.exists(filename):
    raise FileNotFoundError(filename)

Zug run 14 A 4480675_5356875.copc.laz


In [44]:
thresh = 8  # z.B. 5 oder 8
majority_tresh  = 0.5 # Erster Durchgang 0.3, bei "Gleis hohe Intensität" gibt 0.5 ein viel besseres Ergebnis

voxel_size = 1.0

voxel_size = 25 / 30
print("Voxel size:", voxel_size)

minimum_points = 50 # Erste Versuche mit 100, aber viel schwarz bei abseits liegenden Gleisen. 50 ist besser.
minimum_in_hood = 10
linearity_tresh = 0.98

intensity_threshold = 14500
downsample_radius = 0.3
neighborhood_radius = 0.5

Voxel size: 0.8333333333333334


In [45]:
import subprocess
# subprocess.Popen(["pyvistaviewer", filename])

In [46]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(filename='dbscan.log', encoding='utf-8', level=logging.DEBUG)

## Voxelfilter

In [47]:
pipeline = pdal.Pipeline([pdal.Reader(filename)])
pipeline.execute()
points = pipeline.arrays[0]

In [48]:
xyz = np.vstack((points['X'], points['Y'], points['Z'])).transpose()

In [49]:
# Offset entfernen (aber gerundet, damit Kachelgrenzen ganze Zahlen bleiben)
offset = xyz.mean(axis=0).round() 
# xyz -= offset   # Nur für Visualisierung benötigt

In [50]:
points['Classification'] = 0 # Unclassified
RAIL = 20

In [51]:
maxp = xyz.max(axis=0)
minp = xyz.min(axis=0)
maxp, minp

(array([4.48070000e+06, 5.35690000e+06, 4.70156201e+02]),
 array([4.48067500e+06, 5.35687500e+06, 4.50852901e+02]))

In [52]:
voxels = xyz.copy()
voxels[:, :2] = ((xyz[:, :2] - minp[:2]) // voxel_size).astype(int)

In [53]:
# Anzahl der Voxel checken
np.ceil((maxp[:2] - minp[:2]) / voxel_size).astype(int)

array([30, 30])

In [54]:
from collections import defaultdict
voxel_dict = defaultdict(list)
index_dict = defaultdict(list)

# Füllen des Dictionaries
for idx, (point, voxel) in enumerate(zip(xyz, voxels)):
    voxel_key = tuple(voxel[:2])
    voxel_dict[voxel_key].append(point[2])
    index_dict[voxel_key].append(idx)

In [55]:
for key, z_values in voxel_dict.items():
    
    # Threshold on number of points in voxel
    if len(z_values) < minimum_points:
        continue

    indices = np.array(index_dict[key])
    z_values = np.array(z_values)
    ground_level = np.percentile(z_values, 10) # 10% Percentile
    # Check that there are almost no points 0.5 to 4.5 m above the ground
    # But allow for some noise
    # thresh = 3 # Der einfachheit halber oben
    count = ((z_values > ground_level + 0.5) & (z_values < ground_level + 4.5)).sum()

    if count <= thresh:
        # Look for points within 0.5 m above ground and get 98% percentile ODER 99.5
        mask = (z_values > ground_level) & (z_values < ground_level + 0.5)
        try:
            candidates_top = np.percentile(z_values[mask], 99.5)
        except IndexError:
            # Fails if there are no points in the masked array
            continue

        # Oude Elberink require the height difference > 0.1 m
        # And mark only the points 10 cm below the top as rail point candidates
        if candidates_top - ground_level > 0.1:
            mask = (z_values > candidates_top - 0.1) & (z_values < candidates_top + 0.05)

            # Also make sure these are only a minority of the points (otherwise it's a slope)
            if mask.sum() < majority_tresh * len(z_values):  # z.B. 0.3
                points['Classification'][indices[mask]] = RAIL


In [56]:
candidates = points[points["Classification"] == RAIL]
candidates.shape

(153519,)

## Noise Filter

In [57]:
# filters.outlier sets Classification to 7, filters.range removes the points with Classification 7

noise_filter = pdal.Filter("filters.outlier", method="statistical", mean_k=10, multiplier=2.0).pipeline(candidates) | pdal.Filter("filters.range", limits="Classification![7:7]")
print(noise_filter.toJSON())
noise_filter.execute()
candidates = noise_filter.arrays[0]
candidates.shape 

[{"type": "filters.outlier", "method": "statistical", "mean_k": 10, "multiplier": 2.0, "tag": "filters_outlier1"}, {"type": "filters.range", "limits": "Classification![7:7]", "tag": "filters_range1"}]


(149283,)

## View Settings

In [58]:
# Viewsettings mit strg + c kopieren und hier einfügen

viewsettings = '''
{
	"class_name" : "ViewTrajectory",
	"interval" : 29,
	"is_loop" : false,
	"trajectory" : 
	[
		{
			"boundingbox_max" : [ 11.999975427985191, 11.99998692702502, 13.124079998226534 ],
			"boundingbox_min" : [ -13.000024572014809, -13.00001307297498, -3.9965200017734333 ],
			"field_of_view" : 60.0,
			"front" : [ -0.20468464372193082, -0.82045900926496551, 0.53380821531742795 ],
			"lookat" : [ -2.1145501200370735, -2.6052610037108783, 1.4494799802055294 ],
			"up" : [ 0.19010212482081987, 0.50164960558000959, 0.84392467398461002 ],
			"zoom" : 0.55999999999999983
		}
	],
	"version_major" : 1,
	"version_minor" : 0
}

'''

viewsettings = json.loads(viewsettings)

front = viewsettings["trajectory"][0]["front"]
lookat = viewsettings["trajectory"][0]["lookat"]
up = viewsettings["trajectory"][0]["up"]
zoom = viewsettings["trajectory"][0]["zoom"]

## Candidate and Seed Points

In [59]:
xyz = np.vstack((candidates['X'], candidates['Y'], candidates['Z'])).transpose()
xyz -= offset

In [60]:
low_intensity = candidates[candidates["Intensity"] < intensity_threshold]
low_intensity.shape

(58714,)

In [61]:
# Downsample with poisson sampling

downsampling_pipeline = pdal.Filter("filters.sample", radius=downsample_radius).pipeline(low_intensity)
downsampling_pipeline.execute()
seed_points = downsampling_pipeline.arrays[0]
seed_points.shape 

(444,)

In [62]:
xyz_seed = np.vstack((seed_points['X'], seed_points['Y'], seed_points['Z'])).transpose()
xyz_seed -= offset


In [63]:
# k-D tree with all candidate points
tree = KDTree(xyz)  

In [64]:
# indices: ndarray (dtype object) with a list of indices for each seed point
indices = tree.query_ball_point(xyz_seed, r=neighborhood_radius)

In [65]:
seed_point_count = xyz_seed.shape[0]

Numpy: Eigenvektoren mit Länge 1

In [66]:
def pca(cloud):
    """Use PCA to get einvalues and eigenvectors of a point cloud"""
    mean = np.mean(cloud, axis=0)
    centered = cloud - mean
    cov_matrix = np.cov(centered, rowvar=False) # row variance nicht berechnen
    eigenvals, eigenvecs = np.linalg.eig(cov_matrix)
    sorted_indices = np.argsort(eigenvals)[::-1]
    sorted_eigenvals = eigenvals[sorted_indices]
    sorted_eigenvecs = eigenvecs[:,sorted_indices]
    # Returned vectors are in columns, first vector is eigenvec[:, 0] == eigenvec.T[0]
    return sorted_eigenvals, sorted_eigenvecs

def linearity(eigenvals):
    """Calculate the linearity of a point cloud"""
    return (eigenvals[0] - eigenvals[1]) / eigenvals[0]

def pca_spread(points, eigenvals, eigenvects):
    """Length along first principal component"""
    projected = points @ eigenvects[:,0] # Project on first eigenvector
    return (np.max(projected) - np.min(projected)) * np.sqrt(eigenvals[0])

In [67]:
def theta(eigenvects):
    """Angle between the first eigenvector and the z-axis"""
    cos_theta = eigenvects.T[0] @ np.array([0, 0, 1]) / np.linalg.norm(eigenvects[0]) # / np.linalg.norm(eigenvects[0]) unnötig, Vektor hat Länge 1
    return np.arccos(cos_theta) * 180 / np.pi

In [68]:
def dbscan(xyz, eps=0.05, min_points=10):
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [69]:
def dbscan_stretchz(xyz, eps=0.05, min_points=10, stretch=1.5):
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [70]:
def dbscan_stretchz_auto(xyz, min_points=10, stretch=1.5):

    pointcount = xyz.shape[0]
    if pointcount <= min_points:
        return np.ones(pointcount, dtype=np.int8) * -1
    
    eps = 50 / pointcount
    eps = max(eps, 0.06)
    
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [71]:
# def graph_based_segmentation(points, max_distance=0.1):
#     kdtree = KDTree(points)
#     graph = kdtree.sparse_distance_matrix(kdtree, max_distance=max_distance)

#     n_components, labels = connected_components(graph, directed=False)

#     return labels

In [72]:
# Funktioniert nicht, 0 Cluster gefunden

# def dbscan_stretchz_pdal(xyz, eps=0.07, min_points=20, stretch=1.5):
#     xyz = xyz.copy()
#     xyz[:, 2] *= stretch

#     dt = {'names': ['X', 'Y', 'Z'], 'formats': [np.float32, np.float32, np.float32]}
#     pipeline = pdal.Filter(type="filters.dbscan", min_points=20, eps=0.07).pipeline(xyz.view(dt))
    
#     pipeline.execute()
#     return pipeline.arrays[0]['ClusterID']

In [73]:
min_points = 10
minimum_in_hood = 10
linearity_tresh = 0.98 # 0.98

with_lines = False

In [74]:
def plot_cluster_line(cluster, eigenvects, stretch=0.2):
    start = cluster.mean(axis=0)
    vector = eigenvects.T[0] * stretch
    end = start + vector
    line = pv.Line(start, end)

    return line



In [75]:
linearity_at_seed = np.empty((seed_point_count,1), dtype=float)
linearity_at_seed[:,:] = np.nan
trackcount = np.zeros((seed_point_count,1), dtype=int)
clustercount = np.zeros((seed_point_count,1), dtype=int)

lines = []

for i in range(seed_point_count):
    hood = xyz[indices[i]]
    if hood.shape[0] < minimum_in_hood:   
        continue
    logger.debug("--------------------------")
    logger.debug(f"Seed point {i} has {hood.shape[0]} points in its neighborhood")

    # DBSCAN with stretched z
    # labels = dbscan_stretchz(hood, eps=2, min_points=min_points, stretch=4) # Weiche C funktioniert eps=0.07, min_points=20, stretch=4
    labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)
    max_label = labels.max()
    clustercount[i] = max_label + 1
    logger.debug(f"DBSCAN {max_label + 1} clusters")
    # print(max_label + 1, "Clusters")
    for label in range(max_label + 1):
        cluster = hood[labels == label]
        try:
            eigenvals, eigenvects = pca(cluster)
        except np.linalg.LinAlgError: # Bei SVD: ValueError
            # Rare cases of 0 points in cluster
            continue
        cluster_linearity = linearity(eigenvals)

        # print(cluster_linearity, end=" ")
        logger.debug(f"Cluster {label} linearity: {cluster_linearity}")
        logger.debug(f"Eigenvalues: {eigenvals}")
        logger.debug(f"Eigenvectors: {eigenvects}")

        
        if (cluster_linearity > linearity_tresh) and (eigenvals[0] > 0.02):
            # eigenvals[0] ist variance in 1. PC, also abhängig von der Länge
            trackcount[i] += 1
            if with_lines:
                line = plot_cluster_line(cluster, eigenvects)
                lines.append(line)

                
    # print("\nTrack count", trackcount[i])
    logger.debug(f"Track count: {trackcount[i]}")
    # print()

print(trackcount.max())
print(clustercount.max())

2
31


kononenFullyAutomatedExtraction2024 verwenden threshold 0.98 (behalten aber zusätzlich auch Punkte in Nachbarschaften mit hoher Punktdichte)

In [76]:
with_lines = True

In [77]:
# Use pyvista to get scalar colors with color bar
pcd_hood = pv.PolyData(xyz_seed)
pcd_hood["trackcount"] = trackcount
pcd_hood["clustercount"] = clustercount

# pv.plot(pcd_hood, scalars='trackcount', 
#         render_points_as_spheres=True, point_size=10,
#         show_scalar_bar=True,
#         )


p = pv.Plotter()

if with_lines:
    # Add lines (SLOW)
    for line in lines:
        p.add_mesh(line, color='red')


p.add_mesh_threshold(pcd_hood, 'trackcount', all_scalars=True, render_points_as_spheres=True, point_size=10)
p.show()

Widget(value='<iframe src="http://localhost:46249/index.html?ui=P_0x7fe6b22a13d0_1&reconnect=auto" class="pyvi…

In [78]:
def plot_first_eigenvec(point, eigenvects, color=[1, 0, 0], stretch=1):
    """Plot a vector as a line"""
    vector = eigenvects.T[0] * stretch
    end = point + vector
    line = o3d.geometry.LineSet()
    line.points = o3d.utility.Vector3dVector([point, end])
    line.lines = o3d.utility.Vector2iVector([[0, 1]])
    line.colors = o3d.utility.Vector3dVector([color])
    return line

In [79]:
i = 120


lines = []

hood = xyz[indices[i]]
if hood.shape[0] < minimum_in_hood:   
    raise ValueError("Not enough points in neighborhood") # continue
print(f"Seed point {i} has {hood.shape[0]} points in its neighborhood")

# eigenvals, eigenvects = pca(hood)
# linearity_at_seed = linearity(eigenvals)
# theta_at_seed = theta(eigenvects)

# print(f"Linearity of {i}: {linearity_at_seed}")
# print(f"Theta: {theta_at_seed}")
# print(f"Eigenvalues: {eigenvals}")
# print(f"Eigenvectors: {eigenvects}")


trackcount = 0
# labels = dbscan(hood, eps=0.05, min_points=10)
min_points = 10 # hood.shape[0] // 10

# labels = dbscan_stretchz(hood, eps=0.05, min_points=min_points, stretch=2)
# labels = dbscan_stretchz_noise(hood, eps=0.055, min_points=min_points, stretch=2.5, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz_noise(hood, eps=0.07, min_points=min_points, stretch=3, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz(hood, eps=0.09, min_points=min_points, stretch=5)


labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)

max_label = labels.max()
clustercount = max_label + 1
print()
print(f"Found {max_label + 1} clusters")
# print(max_label + 1, "Clusters")
for label in range(max_label + 1):
    cluster = hood[labels == label]
    eigenvals, eigenvects = pca(cluster)
    cluster_linearity = linearity(eigenvals)
    projected = hood @ eigenvects[:,0] # Project on first eigenvector
    # Length along first principal component
    # lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects))
    # print(cluster_linearity, end=" ")
    print(f"Cluster {label}: {labels[labels == label].shape[0]} points")
    print(f"Linearity: {cluster_linearity} {cluster_linearity > linearity_tresh}")
    print(f"Theta: {theta(eigenvects)}")
    print(f"Eigenvalues: {eigenvals}")
    print(f"Eigenvectors: {eigenvects}")
    
    if cluster_linearity > linearity_tresh:
        if eigenvals[0] > 0.02:
            trackcount += 1
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, stretch=eigenvals[0]*5))
        else:
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0.7], stretch=eigenvals[0]*5))
    else:
        lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0], stretch=eigenvals[0]*5))
# print("\nTrack count", trackcount[i])
print(f"Track count: {trackcount}")

# print()

colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = [0.5, 0.5, 0.5, 1] # Farbe für Punkte, die zu keinem Cluster gehören
hood_pcd = o3d.geometry.PointCloud()
hood_pcd.points = o3d.utility.Vector3dVector(hood)
hood_pcd.colors = o3d.utility.Vector3dVector(colors[:,:3])
o3d.visualization.draw_geometries([hood_pcd] + lines)

Seed point 120 has 522 points in its neighborhood

Found 2 clusters
Cluster 0: 500 points
Linearity: 0.7755760911376942 False
Theta: 102.65214994763942
Eigenvalues: [0.04719179 0.01059097 0.00062908]
Eigenvectors: [[ 0.73064302 -0.64913477  0.21162427]
 [-0.64667303 -0.75737859 -0.09050775]
 [-0.21903142  0.07072285  0.97315133]]
Cluster 1: 11 points
Linearity: 0.9012396784881556 False
Theta: 120.6896006624882
Eigenvalues: [7.58865530e-04 7.49458037e-05 1.76557307e-06]
Eigenvectors: [[ 0.31758317  0.31968176 -0.89271748]
 [-0.79915343 -0.41650509 -0.43344816]
 [-0.51038684  0.85107408  0.12319977]]
Track count: 0


Spread ist irgendwie nicht wie die ursprünglichen Meter. 
- Typische Werte bei sinnvollen Clustern: 0.28, 0.09
- Bei Miniclustern: 0.05, 0.02, 0.01
- Sinnvoller direkt die erste Eigenvalue zu nehmen

```
def pca_spread(points, eigenvals, eigenvects):
    """Length along first principal component"""
    projected = points @ eigenvects[:,0] # Project on first eigenvector
    return (np.max(projected) - np.min(projected)) * np.sqrt(eigenvals[0])
```

In [80]:
raise ValueError

ValueError: 

In [None]:
i = 11

lines = []

hood = xyz[indices[i]]
# if hood.shape[0] < minimum_in_hood:   
#     continue
print(f"Seed point {i} has {hood.shape[0]} points in its neighborhood")

# eigenvals, eigenvects = pca(hood)
# linearity_at_seed = linearity(eigenvals)
# theta_at_seed = theta(eigenvects)

# print(f"Linearity of {i}: {linearity_at_seed}")
# print(f"Theta: {theta_at_seed}")
# print(f"Eigenvalues: {eigenvals}")
# print(f"Eigenvectors: {eigenvects}")


trackcount = 0
# labels = dbscan(hood, eps=0.05, min_points=10)
min_points = 10 # hood.shape[0] // 10

# labels = dbscan_stretchz(hood, eps=0.05, min_points=min_points, stretch=2)
# labels = dbscan_stretchz_noise(hood, eps=0.055, min_points=min_points, stretch=2.5, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz_noise(hood, eps=0.07, min_points=min_points, stretch=3, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz(hood, eps=0.09, min_points=min_points, stretch=5)

labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)

max_label = labels.max()
clustercount = max_label + 1
print()
print(f"Found {max_label + 1} clusters")
# print(max_label + 1, "Clusters")
for label in range(max_label + 1):
    cluster = hood[labels == label]
    eigenvals, eigenvects = pca(cluster)
    cluster_linearity = linearity(eigenvals)
    projected = hood @ eigenvects[:,0] # Project on first eigenvector
    # Length along first principal component
    # lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects))
    # print(cluster_linearity, end=" ")
    print(f"Cluster {label}: {labels[labels == label].shape[0]} points")
    print(f"Linearity: {cluster_linearity} {cluster_linearity > linearity_tresh}")
    print(f"Theta: {theta(eigenvects)}")
    print(f"Eigenvalues: {eigenvals}")
    print(f"Eigenvectors: {eigenvects}")
    
    if cluster_linearity > linearity_tresh:
        if eigenvals[0] > 0.02:
            trackcount += 1
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, stretch=eigenvals[0]*5))
        else:
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0.7], stretch=eigenvals[0]*5))
    else:
        lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0], stretch=eigenvals[0]*5))
# print("\nTrack count", trackcount[i])
print(f"Track count: {trackcount}")
# print()

colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = [0.5, 0.5, 0.5, 1] # Farbe für Punkte, die zu keinem Cluster gehören
hood_pcd = o3d.geometry.PointCloud()
hood_pcd.points = o3d.utility.Vector3dVector(hood)
hood_pcd.colors = o3d.utility.Vector3dVector(colors[:,:3])
o3d.visualization.draw_geometries([hood_pcd] + lines)


Seed point 11 has 1174 points in its neighborhood

Found 7 clusters
Cluster 0: 682 points
Linearity: 0.6521589830146466 False
Theta: 91.23391648844144
Eigenvalues: [0.02505452 0.00871499 0.00032574]
Eigenvectors: [[ 0.05805241  0.99657525 -0.058887  ]
 [ 0.99808126 -0.05666547  0.02495657]
 [-0.02153424  0.0602228   0.99795265]]
Cluster 1: 351 points
Linearity: 0.9960838918179614 True
Theta: 90.2799757488955
Eigenvalues: [8.65745384e-02 3.39035258e-04 3.19083419e-05]
Eigenvectors: [[ 0.11040048 -0.98416159 -0.13870004]
 [-0.99387517 -0.10863273 -0.02027497]
 [-0.00488648 -0.14008889  0.99012687]]
Cluster 2: 18 points
Linearity: 0.6817607568089634 False
Theta: 113.64737133347377
Eigenvalues: [2.02383528e-04 6.44063809e-05 4.84404096e-06]
Eigenvectors: [[-0.79785207  0.50274998 -0.33267782]
 [ 0.45005069  0.86388531  0.22617813]
 [-0.40110653 -0.03073481  0.91551566]]
Cluster 3: 11 points
Linearity: 0.8859459234377717 False
Theta: 87.07199372422147
Eigenvalues: [4.34248427e-04 4.95278033

In [None]:
np.linalg.norm((eigenvects.T[2])) 

np.float64(1.0)