# Experimente mit DBSCAN

In [126]:
import pdal 
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d
from scipy.spatial import KDTree
from scipy.sparse.csgraph import connected_components
import scipy.sparse as sp
import os
import json
import pyvista as pv


from interessant import * # Bei Änderungen Kernel neu starten

In [127]:
run = run24
#run = run14
# filename = interessant['OLA gleiche Höhe wie Gleis']

# Bahnsteig: 29; Gleis hohe Intensität: 11; Weiche B: 16; Unterirdischer Bhf: 20; Gleis weit abseits: 23; Betondeckel: 28; Zug run 14 A (in run24 Achszähler): 6; 
# Viele Gleise: 33; Anfang Weiche: 34; Weiche C: 38 OLA gleiche H: 35; Y: 37
key = list(interessant.keys())[11] 
filename = interessant[key]
print(key, filename)

filename = os.path.join(run, filename)
if not os.path.exists(filename):
    raise FileNotFoundError(filename)

Gleis hohe Intensität 4482100_5357075.copc.laz


In [128]:
thresh = 8  # z.B. 5 oder 8
majority_tresh  = 0.5 # Erster Durchgang 0.3, bei "Gleis hohe Intensität" gibt 0.5 ein viel besseres Ergebnis

voxel_size = 1.0

voxel_size = 25 / 30
print("Voxel size:", voxel_size)

minimum_points = 50 # Erste Versuche mit 100, aber viel schwarz bei abseits liegenden Gleisen. 50 ist besser.
minimum_in_hood = 10
linearity_tresh = 0.98

intensity_threshold = 14500
downsample_radius = 0.3
neighborhood_radius = 0.5

Voxel size: 0.8333333333333334


In [129]:
import subprocess
#subprocess.Popen(["pyvistaviewer", filename])

In [130]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(filename='dbscan.log', encoding='utf-8', level=logging.DEBUG)

## Voxelfilter

In [131]:
pipeline = pdal.Pipeline([pdal.Reader(filename)])
pipeline.execute()
points = pipeline.arrays[0]

In [132]:
xyz = np.vstack((points['X'], points['Y'], points['Z'])).transpose()

In [133]:
# Offset entfernen (aber gerundet, damit Kachelgrenzen ganze Zahlen bleiben)
offset = xyz.mean(axis=0).round() 
# xyz -= offset   # Nur für Visualisierung benötigt

In [134]:
points['Classification'] = 0 # Unclassified
RAIL = 20

In [135]:
maxp = xyz.max(axis=0)
minp = xyz.min(axis=0)
maxp, minp

(array([4.482125e+06, 5.357100e+06, 4.571790e+02]),
 array([4.48210000e+06, 5.35707599e+06, 4.47844200e+02]))

In [136]:
voxels = xyz.copy()
voxels[:, :2] = ((xyz[:, :2] - minp[:2]) // voxel_size).astype(int)

In [137]:
# Anzahl der Voxel checken
np.ceil((maxp[:2] - minp[:2]) / voxel_size).astype(int)

array([30, 29])

In [138]:
from collections import defaultdict
voxel_dict = defaultdict(list)
index_dict = defaultdict(list)

# Füllen des Dictionaries
for idx, (point, voxel) in enumerate(zip(xyz, voxels)):
    voxel_key = tuple(voxel[:2])
    voxel_dict[voxel_key].append(point[2])
    index_dict[voxel_key].append(idx)

In [139]:
for key, z_values in voxel_dict.items():
    
    # Threshold on number of points in voxel
    if len(z_values) < minimum_points:
        continue

    indices = np.array(index_dict[key])
    z_values = np.array(z_values)
    ground_level = np.percentile(z_values, 10) # 10% Percentile
    # Check that there are almost no points 0.5 to 4.5 m above the ground
    # But allow for some noise
    # thresh = 3 # Der einfachheit halber oben
    count = ((z_values > ground_level + 0.5) & (z_values < ground_level + 4.5)).sum()

    if count <= thresh:
        # Look for points within 0.5 m above ground and get 98% percentile ODER 99.5
        mask = (z_values > ground_level) & (z_values < ground_level + 0.5)
        try:
            candidates_top = np.percentile(z_values[mask], 99.5)
        except IndexError:
            # Fails if there are no points in the masked array
            continue

        # Oude Elberink require the height difference > 0.1 m
        # And mark only the points 10 cm below the top as rail point candidates
        if candidates_top - ground_level > 0.1:
            mask = (z_values > candidates_top - 0.1) & (z_values < candidates_top + 0.05)

            # Also make sure these are only a minority of the points (otherwise it's a slope)
            if mask.sum() < majority_tresh * len(z_values):  # z.B. 0.3
                points['Classification'][indices[mask]] = RAIL


In [140]:
candidates = points[points["Classification"] == RAIL]
candidates.shape

(42550,)

## Noise Filter

In [141]:
# filters.outlier sets Classification to 7, filters.range removes the points with Classification 7

noise_filter = pdal.Filter("filters.outlier", method="statistical", mean_k=10, multiplier=2.0).pipeline(candidates) | pdal.Filter("filters.range", limits="Classification![7:7]")
print(noise_filter.toJSON())
noise_filter.execute()
candidates = noise_filter.arrays[0]
candidates.shape 

[{"type": "filters.outlier", "method": "statistical", "mean_k": 10, "multiplier": 2.0, "tag": "filters_outlier1"}, {"type": "filters.range", "limits": "Classification![7:7]", "tag": "filters_range1"}]


(41279,)

## View Settings

In [142]:
# Viewsettings mit strg + c kopieren und hier einfügen

viewsettings = '''
{
	"class_name" : "ViewTrajectory",
	"interval" : 29,
	"is_loop" : false,
	"trajectory" : 
	[
		{
			"boundingbox_max" : [ 11.999975427985191, 11.99998692702502, 13.124079998226534 ],
			"boundingbox_min" : [ -13.000024572014809, -13.00001307297498, -3.9965200017734333 ],
			"field_of_view" : 60.0,
			"front" : [ -0.20468464372193082, -0.82045900926496551, 0.53380821531742795 ],
			"lookat" : [ -2.1145501200370735, -2.6052610037108783, 1.4494799802055294 ],
			"up" : [ 0.19010212482081987, 0.50164960558000959, 0.84392467398461002 ],
			"zoom" : 0.55999999999999983
		}
	],
	"version_major" : 1,
	"version_minor" : 0
}

'''

viewsettings = json.loads(viewsettings)

front = viewsettings["trajectory"][0]["front"]
lookat = viewsettings["trajectory"][0]["lookat"]
up = viewsettings["trajectory"][0]["up"]
zoom = viewsettings["trajectory"][0]["zoom"]

## Candidate and Seed Points

In [143]:
xyz = np.vstack((candidates['X'], candidates['Y'], candidates['Z'])).transpose()
xyz -= offset

In [144]:
low_intensity = candidates[candidates["Intensity"] < intensity_threshold]
low_intensity.shape

(616,)

In [145]:
# xyz_low = np.vstack((low_intensity['X'], low_intensity['Y'], low_intensity['Z'])).transpose()
# xyz_low -= offset

# pcd_low_intensity = o3d.geometry.PointCloud()
# pcd_low_intensity.points = o3d.utility.Vector3dVector(xyz_low)
# pcd_low_intensity.paint_uniform_color([0, 0, 0.7])

In [146]:
# Downsample with poisson sampling

downsampling_pipeline = pdal.Filter("filters.sample", radius=downsample_radius).pipeline(low_intensity)
downsampling_pipeline.execute()
seed_points = downsampling_pipeline.arrays[0]
seed_points.shape 

(145,)

In [147]:
xyz_seed = np.vstack((seed_points['X'], seed_points['Y'], seed_points['Z'])).transpose()
xyz_seed -= offset

# pcd_seed_points = o3d.geometry.PointCloud()
# pcd_seed_points.points = o3d.utility.Vector3dVector(xyz_seed)
# pcd_seed_points.paint_uniform_color([1, 0, 0])

In [148]:
# o3d.visualization.draw_geometries([
#     pcd_candidates, 
#     pcd_low_intensity, 
#     pcd_seed_points
#     ], front=front, lookat=lookat, up=up, zoom=zoom)

In [149]:
# o3d.visualization.draw_geometries([pcd_seed_points], front=front, lookat=lookat, up=up, zoom=zoom)

In [150]:
# k-D tree with all candidate points
tree = KDTree(xyz)  

In [151]:
# indices: ndarray (dtype object) with a list of indices for each seed point
indices = tree.query_ball_point(xyz_seed, r=neighborhood_radius)

In [152]:
seed_point_count = xyz_seed.shape[0]

In [153]:
def pca(cloud):
    """Use PCA to get einvalues and eigenvectors of a point cloud"""
    mean = np.mean(cloud, axis=0)
    centered = cloud - mean
    cov_matrix = np.cov(centered, rowvar=False) # row variance nicht berechnen
    eigenvals, eigenvecs = np.linalg.eig(cov_matrix)
    sorted_indices = np.argsort(eigenvals)[::-1]
    sorted_eigenvals = eigenvals[sorted_indices]
    sorted_eigenvecs = eigenvecs[:,sorted_indices]
    return sorted_eigenvals, sorted_eigenvecs

def linearity(eigenvals):
    """Calculate the linearity of a point cloud"""
    return (eigenvals[0] - eigenvals[1]) / eigenvals[0]

def pca_spread(points, eigenvals, eigenvects):
    """Length along first principal component"""
    projected = points @ eigenvects[:,0] # Project on first eigenvector
    return (np.max(projected) - np.min(projected)) * np.sqrt(eigenvals[0])

In [154]:
def theta(eigenvects):
    """Angle between the first eigenvector and the z-axis"""
    cos_theta = eigenvects.T[0] @ np.array([0, 0, 1]) / np.linalg.norm(eigenvects[0])
    return np.arccos(cos_theta) * 180 / np.pi

In [155]:
def dbscan(xyz, eps=0.05, min_points=10):
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [156]:
def dbscan_stretchz(xyz, eps=0.05, min_points=10, stretch=1.5):
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [157]:
def dbscan_stretchz_auto(xyz, min_points=10, stretch=1.5):

    pointcount = xyz.shape[0]
    if pointcount <= min_points:
        return np.ones(pointcount, dtype=np.int8) * -1
    
    eps = 50 / pointcount
    eps = max(eps, 0.05)
    
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [158]:
50/600

0.08333333333333333

In [159]:
def dbscan_stretchz_noise(xyz, eps=0.05, min_points=10, stretch=1.5, noise_neighbors=10, noise_std=2.0):
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    filtered_pcd, ind = hood_pcd.remove_statistical_outlier(nb_neighbors=noise_neighbors, std_ratio=noise_std)
    # eps is min distance between clusters
    labels = np.array(filtered_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    # Map back to original indices
    labels_orig = np.ones(len(xyz), dtype=int) * -1
    labels_orig[ind] = labels
    return labels_orig

In [160]:
def graph_based_segmentation(points, max_distance=0.1):
    kdtree = KDTree(points)
    graph = kdtree.sparse_distance_matrix(kdtree, max_distance=max_distance)

    n_components, labels = connected_components(graph, directed=False)

    return labels

In [161]:
# Funktioniert nicht, 0 Cluster gefunden

# def dbscan_stretchz_pdal(xyz, eps=0.07, min_points=20, stretch=1.5):
#     xyz = xyz.copy()
#     xyz[:, 2] *= stretch

#     dt = {'names': ['X', 'Y', 'Z'], 'formats': [np.float32, np.float32, np.float32]}
#     pipeline = pdal.Filter(type="filters.dbscan", min_points=20, eps=0.07).pipeline(xyz.view(dt))
    
#     pipeline.execute()
#     return pipeline.arrays[0]['ClusterID']

In [162]:
min_points = 10
minimum_in_hood = 10
linearity_tresh = 0.97 # 0.98

In [163]:
linearity_at_seed = np.empty((seed_point_count,1), dtype=float)
linearity_at_seed[:,:] = np.nan
trackcount = np.zeros((seed_point_count,1), dtype=int)
clustercount = np.zeros((seed_point_count,1), dtype=int)


for i in range(seed_point_count):
    hood = xyz[indices[i]]
    if hood.shape[0] < minimum_in_hood:   
        continue
    logger.debug("--------------------------")
    logger.debug(f"Seed point {i} has {hood.shape[0]} points in its neighborhood")

    # DBSCAN with stretched z
    # labels = dbscan_stretchz(hood, eps=2, min_points=min_points, stretch=4) # Weiche C funktioniert eps=0.07, min_points=20, stretch=4
    labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)
    max_label = labels.max()
    clustercount[i] = max_label + 1
    logger.debug(f"DBSCAN {max_label + 1} clusters")
    # print(max_label + 1, "Clusters")
    for label in range(max_label + 1):
        cluster = hood[labels == label]
        try:
            eigenvals, eigenvects = pca(cluster)
        except np.linalg.LinAlgError:
            continue
        cluster_linearity = linearity(eigenvals)

        # print(cluster_linearity, end=" ")
        logger.debug(f"Cluster {label} linearity: {cluster_linearity}")
        logger.debug(f"Theta: {theta(eigenvects)}")
        logger.debug(f"Eigenvalues: {eigenvals}")
        logger.debug(f"Eigenvectors: {eigenvects}")

        
        if (cluster_linearity > linearity_tresh) and (eigenvals[0] > 0.02):
            # eigenvals[0] ist variance in 1. PC, also abhängig von der Länge
            trackcount[i] += 1
    # print("\nTrack count", trackcount[i])
    logger.debug(f"Track count: {trackcount[i]}")
    # print()

print(trackcount.max())
print(clustercount.max())

1
2


kononenFullyAutomatedExtraction2024 verwenden threshold 0.98 (behalten aber zusätzlich auch Punkte in Nachbarschaften mit hoher Punktdichte)

In [164]:
# Use pyvista to get scalar colors with color bar
pcd_hood = pv.PolyData(xyz_seed)
pcd_hood["trackcount"] = trackcount
pcd_hood["clustercount"] = clustercount

# pv.plot(pcd_hood, scalars='trackcount', 
#         render_points_as_spheres=True, point_size=10,
#         show_scalar_bar=True,
#         )


p = pv.Plotter()
# Add thresholding tool
# (it's not a mesh, but anyway)

p.add_mesh_threshold(pcd_hood, 'trackcount', all_scalars=True, render_points_as_spheres=True, point_size=10)
p.show()

Widget(value='<iframe src="http://localhost:37637/index.html?ui=P_0x7f977e53e7f0_3&reconnect=auto" class="pyvi…

In [165]:
def plot_first_eigenvec(point, eigenvects, color=[1, 0, 0], stretch=1):
    """Plot a vector as a line"""
    vector = eigenvects.T[0] * stretch
    end = point + vector
    line = o3d.geometry.LineSet()
    line.points = o3d.utility.Vector3dVector([point, end])
    line.lines = o3d.utility.Vector2iVector([[0, 1]])
    line.colors = o3d.utility.Vector3dVector([color])
    return line

In [167]:
i = 16

lines = []

hood = xyz[indices[i]]
if hood.shape[0] < minimum_in_hood:   
    raise ValueError("Not enough points in neighborhood") # continue
print(f"Seed point {i} has {hood.shape[0]} points in its neighborhood")

# eigenvals, eigenvects = pca(hood)
# linearity_at_seed = linearity(eigenvals)
# theta_at_seed = theta(eigenvects)

# print(f"Linearity of {i}: {linearity_at_seed}")
# print(f"Theta: {theta_at_seed}")
# print(f"Eigenvalues: {eigenvals}")
# print(f"Eigenvectors: {eigenvects}")


trackcount = 0
# labels = dbscan(hood, eps=0.05, min_points=10)
min_points = 10 # hood.shape[0] // 10

# labels = dbscan_stretchz(hood, eps=0.05, min_points=min_points, stretch=2)
# labels = dbscan_stretchz_noise(hood, eps=0.055, min_points=min_points, stretch=2.5, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz_noise(hood, eps=0.07, min_points=min_points, stretch=3, noise_neighbors=10, noise_std=2.0)
labels = dbscan_stretchz(hood, eps=0.09, min_points=min_points, stretch=5)

# labels = dbscan_stretchz_pdal(hood, eps=0.1, min_points=20, stretch=4)

max_label = labels.max()
clustercount = max_label + 1
print()
print(f"Found {max_label + 1} clusters")
# print(max_label + 1, "Clusters")
for label in range(max_label + 1):
    cluster = hood[labels == label]
    eigenvals, eigenvects = pca(cluster)
    cluster_linearity = linearity(eigenvals)
    projected = hood @ eigenvects[:,0] # Project on first eigenvector
    # Length along first principal component
    # lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects))
    # print(cluster_linearity, end=" ")
    print(f"Cluster {label}: {labels[labels == label].shape[0]} points")
    print(f"Linearity: {cluster_linearity} {cluster_linearity > linearity_tresh}")
    print(f"Theta: {theta(eigenvects)}")
    print(f"Eigenvalues: {eigenvals}")
    print(f"Eigenvectors: {eigenvects}")
    
    if cluster_linearity > linearity_tresh:
        if eigenvals[0] > 0.02:
            trackcount += 1
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, stretch=eigenvals[0]*5))
        else:
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0.7], stretch=eigenvals[0]*5))
    else:
        lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0], stretch=eigenvals[0]*5))
# print("\nTrack count", trackcount[i])
print(f"Track count: {trackcount}")
# print()

colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = [0.5, 0.5, 0.5, 1] # Farbe für Punkte, die zu keinem Cluster gehören
hood_pcd = o3d.geometry.PointCloud()
hood_pcd.points = o3d.utility.Vector3dVector(hood)
hood_pcd.colors = o3d.utility.Vector3dVector(colors[:,:3])
o3d.visualization.draw_geometries([hood_pcd] + lines)

Seed point 16 has 32 points in its neighborhood

Found 0 clusters
Track count: 0


Spread ist irgendwie nicht wie die ursprünglichen Meter. 
- Typische Werte bei sinnvollen Clustern: 0.28, 0.09
- Bei Miniclustern: 0.05, 0.02, 0.01
- Sinnvoller direkt die erste Eigenvalue zu nehmen

```
def pca_spread(points, eigenvals, eigenvects):
    """Length along first principal component"""
    projected = points @ eigenvects[:,0] # Project on first eigenvector
    return (np.max(projected) - np.min(projected)) * np.sqrt(eigenvals[0])
```