# DBSCAN Refinement
- Der Bug, dass Cluster neben Radlenkern verloren gegangen sind, lag an `cluster_is_ontop()`
weil der Radius zu groß war. 0.05 statt 0.1
- Auch in vorausgehenden notebooks korrigiert


In [97]:
import pdal 
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import KDTree
from scipy.linalg import svd 
from osgeo import ogr
import pyvista as pv
import os
import json
import time 

from interessant import * # Bei Änderungen Kernel neu starten

ogr.UseExceptions()

In [98]:
import open3d as o3d

In [None]:
tmpdir = "/media/riannek/minimax/gleis/temp_run24"

# Bahnsteig: 29; Gleis hohe Intensität: 11; Weiche B: 16; Unterirdischer Bhf: 20; Gleis weit abseits: 23; Betondeckel: 28; Zug run 14 A (in run24 Achszähler): 6; 
# Viele Gleise: 33; Anfang Weiche: 34; OLA gleiche H: 35; Y: 37

key = list(interessant.keys())[33] 

filename = interessant[key]
filename = filename.split(".")[0] + ".ply"
print(key, filename)


if not os.path.exists(os.path.join(tmpdir,"candidates", filename)):
    raise FileNotFoundError(filename)

Viele Gleise 4474750_5332150.ply


In [100]:
thresh_side_distance = 0.15   # 0.1 ist meist gut, aber in manchen Fällen zu wenig (z.B. Anfang Weiche)

intensity_threshold = 14500
downsample_radius = 0.45 # 0.4
neighborhood_radius = 0.5 # 0.5

min_points = 10
minimum_in_hood = 10
linearity_tresh = 0.98

gauge = 1.435

In [101]:
runfolder = "/media/riannek/minimax/gleis/run24-2024-08-13"

import subprocess
# subprocess.Popen(["pyvistaviewer", os.path.join(runfolder, filename.split(".")[0] + ".copc.laz")])

## Datei incl. margin aus Nachbarkacheln

In [102]:
gpkg = ogr.Open(os.path.join(tmpdir, "temp.gpkg"))
layer = gpkg.GetLayerByName("tiles")

In [103]:
def extend_bbox(bbox, margin=2):
    # MinX, MaxX, MinY, MaxY
    return (bbox[0] - margin, bbox[1] + margin, bbox[2] - margin, bbox[3] + margin)

def get_bbox_polygon(bbox):  
    ring = ogr.Geometry(ogr.wkbLinearRing)      
    ring.AddPoint_2D(bbox[0], bbox[2])  # MinX, MinY
    ring.AddPoint_2D(bbox[1], bbox[2])  # MaxX, MinY
    ring.AddPoint_2D(bbox[1], bbox[3])  # MaxX, MaxY
    ring.AddPoint_2D(bbox[0], bbox[3])  # MinX, MaxY
    ring.AddPoint_2D(bbox[0], bbox[2])  # Close ring
    geom = ogr.Geometry(ogr.wkbPolygon)
    geom.AddGeometry(ring)
    return geom

In [104]:
filter = f"filename = '{filename}'"
layer.SetAttributeFilter(filter)
feature = layer.GetNextFeature()
layer.SetAttributeFilter(None)
bbox = feature.GetGeometryRef().GetEnvelope()
extended = extend_bbox(bbox, margin=2)
bbox_geom = get_bbox_polygon(extended)
layer.SetSpatialFilter(bbox_geom)
tiles = [f.GetField("filename") for f in layer]
layer.SetSpatialFilter(None) 

print(tiles) 

['4474725_5332175.ply', '4474750_5332175.ply', '4474750_5332125.ply', '4474725_5332150.ply', '4474775_5332125.ply', '4474775_5332150.ply', '4474750_5332150.ply', '4474725_5332125.ply', '4474775_5332175.ply']


In [105]:
bbox # MinX, MaxX, MinY, MaxY

(4474750.0, 4474775.0, 5332150.0, 5332175.0)

In [106]:
del gpkg

In [107]:
bounds = f"([{extended[0]}, {extended[1]}], [{extended[2]}, {extended[3]}])" 
bounds

'([4474748.0, 4474777.0], [5332148.0, 5332177.0])'

In [None]:
readers = [pdal.Reader(os.path.join(tmpdir,"candidates", tile)) for tile in tiles]
pipeline = pdal.Pipeline(readers) | pdal.Filter.merge() | pdal.Filter.crop(bounds=bounds)
pipeline.execute()
points = pipeline.arrays[0]
points.shape 

(296313,)

## Seed Points

In [109]:
low_intensity = points[points["Intensity"] < intensity_threshold]
low_intensity.shape

(247472,)

In [110]:
bounds = f"([{bbox[0]}, {bbox[1]}], [{bbox[2]}, {bbox[3]}])" 
bounds

'([4474750.0, 4474775.0], [5332150.0, 5332175.0])'

In [111]:
# Downsample with poisson sampling (only original bbox)

downsampling_pipeline = pdal.Filter.crop(bounds=bounds).pipeline(low_intensity) | pdal.Filter("filters.sample", radius=downsample_radius)
downsampling_pipeline.execute()
seed_points = downsampling_pipeline.arrays[0]
seed_point_count = seed_points.shape[0]
seed_point_count

526

## KD Tree

In [112]:
xyz = np.vstack((points['X'], points['Y'], points['Z'])).transpose()
xyz_seed = np.vstack((seed_points['X'], seed_points['Y'], seed_points['Z'])).transpose()

In [113]:
offset = xyz.mean(axis=0).round() 
xyz -= offset
xyz_seed -= offset

In [114]:
tree = KDTree(xyz)  

# indices: ndarray (dtype object) with a list of indices for each seed point
indices = tree.query_ball_point(xyz_seed, r=neighborhood_radius)

## Funktionen

In [115]:
def pca(cloud):
    """Use PCA to get einvalues and eigenvectors of a point cloud
    
    Returns (eigenvalues, eigenvectors)
    """
    if cloud.shape[0] < 3:
        raise ValueError("Point cloud must have at least 3 points")
    mean = np.mean(cloud, axis=0)
    centered = cloud - mean
    U, S, Vt = svd(centered, full_matrices=False)
    eigenvals = S**2/(cloud.shape[0]-1)
    # Returned vectors are in columns, first vector is eigenvec[:, 0] == eigenvec.T[0]
    return eigenvals, Vt.T

def linearity(eigenvals):
    """Calculate the linearity of a point cloud"""
    return (eigenvals[0] - eigenvals[1]) / eigenvals[0]



In [116]:
def dbscan_stretchz(xyz, min_points=10, stretch=1.5):

    pointcount = xyz.shape[0]
    if pointcount <= min_points:
        return np.ones(pointcount, dtype=np.int8) * -1
    
    eps = 50 / pointcount
    eps = max(eps, 0.06)
    
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels


In [117]:
def plot_cluster_line(cluster, eigenvects, stretch=0.2):
    start = cluster.mean(axis=0)
    vector = eigenvects.T[0] * stretch
    end = start + vector
    line = pv.Line(start, end)

    return line

In [118]:
def plot_first_eigenvec(point, eigenvects, color=[1, 0, 0], stretch=1):
    """Plot a vector as a line"""
    vector = eigenvects.T[0] * stretch
    end = point + vector
    line = o3d.geometry.LineSet()
    line.points = o3d.utility.Vector3dVector([point, end])
    line.lines = o3d.utility.Vector2iVector([[0, 1]])
    line.colors = o3d.utility.Vector3dVector([color])
    return line

In [119]:
def get_central_point(a, b):
    """Get the point in the middle of two points"""
    vector = b - a
    return a + vector / 2

In [120]:
def to_color(skalar, cmap=plt.cm.plasma):
    return cmap(skalar)[:3]

In [121]:
def cluster_is_on_top(cluster, clustercenter, points):
    """Check that there are no points above the cluster center"""
    x_min, x_max = clustercenter[0] - 0.05, clustercenter[0] + 0.05
    y_min, y_max = clustercenter[1] - 0.05, clustercenter[1] + 0.05
    z_min = cluster[:,2].max()

    mask = ((points[:, 0] > x_min) & 
        (points[:, 0] < x_max) & 
        (points[:, 1] > y_min) & 
        (points[:, 1] < y_max) & 
        (points[:, 2] > z_min))
    
    
    return points[mask].shape[0] == 0


In [122]:
def cluster_generator(points, min_points):
    if points.shape[0] < min_points:
        return
    
    labels = dbscan_stretchz(points, min_points=min_points, stretch=4)

    # if len(labels) == 0:
    #     return
    
    max_label = labels.max()
    
    for label in range(max_label + 1):
        cluster = points[labels == label]
        if cluster.shape[0] < min_points:
            # Somehow this happens, must be a bug in open3d
            continue

        eigenvals, eigenvects = pca(cluster)
        cluster_linearity = linearity(eigenvals)


        if not ((cluster_linearity > linearity_tresh) and (eigenvals[0] > 0.04)):
            # Cluster not linear or too short
            continue

        clustercenter = cluster.mean(axis=0)

        # Make sure there are no points above the cluster
        # (exclude clusters at the base of the track profile)
        if not cluster_is_on_top(cluster, clustercenter, points):
            continue
        

        yield cluster, clustercenter, eigenvals, eigenvects


def check_cluster_for_railpair(clustercenter, eigenvects, pointcloud, tree, gauge, min_points):

    # Check for rail pair
    sidevector = np.cross(eigenvects[:,0], np.array([0, 0, 1])) * (gauge + 0.07) # Add railtop width to gauge
    bothsides = np.array([clustercenter + sidevector, clustercenter - sidevector])

    indices_bothsides = tree.query_ball_point(bothsides, r=neighborhood_radius)
    
    # Linear cluster that is closest to one of the estimated side points (ignoring z)
    # but must me within r = 15 cm and almost parallel

    closest_cluster_distance = np.inf
    closest_cluster = None
    closest_cluster_center = None
    
    for j, side in enumerate(indices_bothsides):
        for sidecluster, sideclustercenter, _, eigenvects_side in cluster_generator(pointcloud[side], min_points):
            
            # Check distance
            sidecluster_distance = np.linalg.norm(sideclustercenter[:2] - bothsides[j][:2]) # only xy
            if sidecluster_distance < thresh_side_distance and sidecluster_distance < closest_cluster_distance:
                # Check if parallel
                cos_angle = np.abs(np.dot(eigenvects[:,0], eigenvects_side[:,0]))
                if cos_angle > 0.9:  
                    closest_cluster = sidecluster
                    closest_cluster_distance = sidecluster_distance
                    closest_cluster_center = sideclustercenter

    return closest_cluster, closest_cluster_center

## Iteration über Seed Points

In [123]:
with_lines = True

In [124]:
start = time.time()

lines = []
side_distances = []
center_points = []
delta_z = []

for i in range(seed_point_count):
    for cluster, clustercenter, eigenvals, eigenvects in cluster_generator(xyz[indices[i]], min_points=min_points):
        pair_cluster, pair_center = check_cluster_for_railpair(clustercenter, eigenvects, xyz, tree, gauge, min_points)
        if pair_cluster is not None:
            lines.append(pv.Line(clustercenter, pair_center))
            center_points.append(get_central_point(clustercenter, pair_center))
            delta_z.append(np.abs(clustercenter[2] - pair_center[2]))

print(f"Time: {time.time() - start:.2f} s")
print(len(lines), "lines found")

Time: 3.34 s
338 lines found


In [125]:
pcd_all = pv.PolyData(xyz)
pcd_seed = pv.PolyData(xyz_seed) 

In [126]:
center_points = np.array(center_points)
pcd_centerpoints = pv.PolyData(center_points)
pcd_centerpoints["z"] = center_points[:,2]
pcd_centerpoints["diff_z"] = np.abs(np.array(delta_z))

p = pv.Plotter()

if with_lines:
    # Add lines (SLOW)
    # for color, line in zip(side_distances, lines):
    #     p.add_mesh(line, scalars=color, cmap='plasma')
    for line in lines:
        p.add_mesh(line, color="red")


# p.add_mesh_threshold(pcd_seed, 'trackcount', all_scalars=True, render_points_as_spheres=True, point_size=10)

p.add_mesh(pcd_all, color='gray', point_size=1)
p.add_mesh(pcd_seed, color='green', point_size=3)
p.add_mesh(pcd_centerpoints, scalars="diff_z", point_size=5, cmap="cividis")

labels = [str(i) for i in range(len(xyz_seed))]
p.add_point_labels(pcd_seed, labels, point_color="black", font_size=12)

p.show()

# Farbe: Punkte nach z-Wert oder diff z
# Linien: Differenz Clusterzentrum zu Spurweite

Widget(value='<iframe src="http://localhost:40811/index.html?ui=P_0x7f73aa5600d0_3&reconnect=auto" class="pyvi…

## Single Seedpoints

In [127]:
i = 240


lines = []

hood = xyz[indices[i]]
if hood.shape[0] < minimum_in_hood:   
    raise ValueError("Not enough points in neighborhood") # continue


trackcount = 0
# labels = dbscan(hood, eps=0.05, min_points=10)
min_points = 10 # hood.shape[0] // 10

# labels = dbscan_stretchz(hood, eps=0.05, min_points=min_points, stretch=2)
# labels = dbscan_stretchz_noise(hood, eps=0.055, min_points=min_points, stretch=2.5, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz_noise(hood, eps=0.07, min_points=min_points, stretch=3, noise_neighbors=10, noise_std=2.0)
# labels = dbscan_stretchz(hood, eps=0.09, min_points=min_points, stretch=5)


labels = dbscan_stretchz(hood, min_points=min_points, stretch=4)

max_label = labels.max()
clustercount = max_label + 1
print()
print(f"Found {max_label + 1} clusters")
# print(max_label + 1, "Clusters")
for label in range(max_label + 1):
    cluster = hood[labels == label]
    eigenvals, eigenvects = pca(cluster)
    cluster_linearity = linearity(eigenvals)
    projected = hood @ eigenvects[:,0] # Project on first eigenvector
    # Length along first principal component
    # lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects))
    # print(cluster_linearity, end=" ")
    print(f"Cluster {label}: {labels[labels == label].shape[0]} points")
    print(f"  Linearity: {cluster_linearity} {cluster_linearity > linearity_tresh}")
    print(f"  Eigenvalues: {eigenvals} {eigenvals[0] > 0.04}")
    clustercenter = cluster.mean(axis=0)
    print(f"  On top {cluster_is_on_top(cluster, clustercenter, hood)}")

    
    if cluster_linearity > linearity_tresh:
        if eigenvals[0] > 0.04:
            trackcount += 1
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, stretch=eigenvals[0]*5))
            print(f"  --- Track ---")
        else:
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0.7], stretch=eigenvals[0]*5))
    else:
        lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0], stretch=eigenvals[0]*5))
# print("\nTrack count", trackcount[i])
print(f"Track count: {trackcount}")

# print()

colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = [0.5, 0.5, 0.5, 1] # Farbe für Punkte, die zu keinem Cluster gehören
hood_pcd = o3d.geometry.PointCloud()
hood_pcd.points = o3d.utility.Vector3dVector(hood)
hood_pcd.colors = o3d.utility.Vector3dVector(colors[:,:3])
o3d.visualization.draw_geometries([hood_pcd] + lines)


Found 1 clusters
Cluster 0: 505 points
  Linearity: 0.9874208226565139 True
  Eigenvalues: [8.13843757e-02 1.02374850e-03 7.13068860e-05] True
  On top True
  --- Track ---
Track count: 1


In [128]:
pcd_all_o3d = o3d.geometry.PointCloud()
pcd_all_o3d.points = o3d.utility.Vector3dVector(xyz)
pcd_all_o3d.paint_uniform_color([0.7, 0.7, 0.7])
mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0])
o3d.visualization.draw_geometries([pcd_all_o3d, mesh_frame, hood_pcd])