# DBSCAN Beide Seiten
- In 15 sind die Linien / Center Points in der Weiche auf zwei verschiedenen Niveaus und die unteren leicht versetzt in x,y
- Ausprobieren: Was passiert, wenn die Cluster länger sein müssen?
- Evtl. kleinerer Suchradius, 0.4? Dann muss aber auch linearity_thresh verringert werden
- `thresh_side_distance = 0.15`: 0.1 ist meist gut, aber in manchen Fällen zu wenig (insbes. Anfang Weiche), daher 0.15 
- Bei "Anfang Weiche" liegt auf einer Seite eine der Zungen an der Schiene, dadurch ist der Cluster deutlich breiter und dadurch das Clusterzentrum nicht an der richtigen Stelle

In [2]:
import pdal 
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d
from scipy.spatial import KDTree
from scipy.linalg import svd 
from osgeo import ogr
import pyvista as pv
import os
import json
import time 

from interessant import * # Bei Änderungen Kernel neu starten

ogr.UseExceptions()

In [None]:
tmpdir = "/media/riannek/minimax/gleis/temp_run24"

# Bahnsteig: 29; Gleis hohe Intensität: 11; Weiche B: 16; Unterirdischer Bhf: 20; Gleis weit abseits: 23; Betondeckel: 28; Zug run 14 A (in run24 Achszähler): 6; 
# Viele Gleise: 33; Anfang Weiche: 34; OLA gleiche H: 35; Y: 37

key = list(interessant.keys())[42] 

filename = interessant[key]
filename = filename.split(".")[0] + ".ply"
print(key, filename)


if not os.path.exists(os.path.join(tmpdir, "candidates",filename)):
    raise FileNotFoundError(filename)

Extrem viele Punkte run24 4474800_5332150.ply


In [4]:
thresh_side_distance = 0.15   # 0.1 ist meist gut, aber in manchen Fällen zu wenig (insbes. Anfang Weiche), daher 0.15 

intensity_threshold = 14500
downsample_radius = 0.45 # 0.4
neighborhood_radius = 0.5 # 0.5

min_points = 10
minimum_in_hood = 10
linearity_tresh = 0.98

gauge = 1.435

In [5]:
runfolder = "/media/riannek/minimax/gleis/run24-2024-08-13"

import subprocess
# subprocess.Popen(["pyvistaviewer", os.path.join(runfolder, filename.split(".")[0] + ".copc.laz")])

## Datei incl. margin aus Nachbarkacheln

In [6]:
gpkg = ogr.Open(os.path.join(tmpdir, "temp.gpkg"))
layer = gpkg.GetLayerByName("tiles")

In [7]:
def extend_bbox(bbox, margin=2):
    # MinX, MaxX, MinY, MaxY
    return (bbox[0] - margin, bbox[1] + margin, bbox[2] - margin, bbox[3] + margin)

def get_bbox_polygon(bbox):  
    ring = ogr.Geometry(ogr.wkbLinearRing)      
    ring.AddPoint_2D(bbox[0], bbox[2])  # MinX, MinY
    ring.AddPoint_2D(bbox[1], bbox[2])  # MaxX, MinY
    ring.AddPoint_2D(bbox[1], bbox[3])  # MaxX, MaxY
    ring.AddPoint_2D(bbox[0], bbox[3])  # MinX, MaxY
    ring.AddPoint_2D(bbox[0], bbox[2])  # Close ring
    geom = ogr.Geometry(ogr.wkbPolygon)
    geom.AddGeometry(ring)
    return geom

In [8]:
filter = f"filename = '{filename}'"
layer.SetAttributeFilter(filter)
feature = layer.GetNextFeature()
layer.SetAttributeFilter(None)
bbox = feature.GetGeometryRef().GetEnvelope()
extended = extend_bbox(bbox, margin=2)
bbox_geom = get_bbox_polygon(extended)
layer.SetSpatialFilter(bbox_geom)
tiles = [f.GetField("filename") for f in layer]
layer.SetSpatialFilter(None) 

print(tiles) 

['4474800_5332175.ply', '4474825_5332175.ply', '4474775_5332125.ply', '4474775_5332150.ply', '4474800_5332150.ply', '4474800_5332125.ply', '4474825_5332125.ply', '4474825_5332150.ply', '4474775_5332175.ply']


In [9]:
bbox # MinX, MaxX, MinY, MaxY

(4474800.0, 4474825.0, 5332150.0, 5332174.999)

In [10]:
del gpkg

In [11]:
bounds = f"([{extended[0]}, {extended[1]}], [{extended[2]}, {extended[3]}])" 
bounds

'([4474798.0, 4474827.0], [5332148.0, 5332176.999])'

In [None]:
readers = [pdal.Reader(os.path.join(tmpdir, "candidates",tile)) for tile in tiles]
pipeline = pdal.Pipeline(readers) | pdal.Filter.merge() | pdal.Filter.crop(bounds=bounds)
pipeline.execute()
points = pipeline.arrays[0]
points.shape 

(530827,)

## Seed Points

In [13]:
low_intensity = points[points["Intensity"] < intensity_threshold]
low_intensity.shape

(395892,)

In [14]:
bounds = f"([{bbox[0]}, {bbox[1]}], [{bbox[2]}, {bbox[3]}])" 
bounds

'([4474800.0, 4474825.0], [5332150.0, 5332174.999])'

In [15]:
# Downsample with poisson sampling (only original bbox)

downsampling_pipeline = pdal.Filter.crop(bounds=bounds).pipeline(low_intensity) | pdal.Filter("filters.sample", radius=downsample_radius)
downsampling_pipeline.execute()
seed_points = downsampling_pipeline.arrays[0]
seed_point_count = seed_points.shape[0]
seed_point_count

378

## KD Tree

In [16]:
xyz = np.vstack((points['X'], points['Y'], points['Z'])).transpose()
xyz_seed = np.vstack((seed_points['X'], seed_points['Y'], seed_points['Z'])).transpose()

In [17]:
offset = xyz.mean(axis=0).round() 
xyz -= offset
xyz_seed -= offset

In [18]:
tree = KDTree(xyz)  

# indices: ndarray (dtype object) with a list of indices for each seed point
indices = tree.query_ball_point(xyz_seed, r=neighborhood_radius)

## Funktionen

In [19]:
def pca(cloud):
    """Use PCA to get einvalues and eigenvectors of a point cloud
    
    Returns (eigenvalues, eigenvectors)
    """
    if cloud.shape[0] < 3:
        raise ValueError("Point cloud must have at least 3 points")
    mean = np.mean(cloud, axis=0)
    centered = cloud - mean
    U, S, Vt = svd(centered, full_matrices=False)
    eigenvals = S**2/(cloud.shape[0]-1)
    # Returned vectors are in columns, first vector is eigenvec[:, 0] == eigenvec.T[0]
    return eigenvals, Vt.T

def linearity(eigenvals):
    """Calculate the linearity of a point cloud"""
    return (eigenvals[0] - eigenvals[1]) / eigenvals[0]

def verticality(eigenvects):
    """Calculate the verticality of a point cloud
    
    1 minus z of the third eigenvector
    """
    return 1 - eigenvects.T[2][2]

In [20]:
def dbscan_stretchz_auto(xyz, min_points=10, stretch=1.5):

    pointcount = xyz.shape[0]
    if pointcount <= min_points:
        return np.ones(pointcount, dtype=np.int8) * -1
    
    eps = 50 / pointcount
    eps = max(eps, 0.06)
    
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [21]:
def plot_cluster_line(cluster, eigenvects, stretch=0.2):
    start = cluster.mean(axis=0)
    vector = eigenvects.T[0] * stretch
    end = start + vector
    line = pv.Line(start, end)

    return line

In [22]:
def plot_first_eigenvec(point, eigenvects, color=[1, 0, 0], stretch=1):
    """Plot a vector as a line"""
    vector = eigenvects.T[0] * stretch
    end = point + vector
    line = o3d.geometry.LineSet()
    line.points = o3d.utility.Vector3dVector([point, end])
    line.lines = o3d.utility.Vector2iVector([[0, 1]])
    line.colors = o3d.utility.Vector3dVector([color])
    return line

In [23]:
def get_central_point(a, b):
    """Get the point in the middle of two points"""
    vector = b - a
    return a + vector / 2

In [24]:
def to_color(skalar, cmap=plt.cm.plasma):
    return cmap(skalar)[:3]

## Iteration über Seed Points

In [25]:
with_lines = True

In [26]:
start = time.time()

lines = []
side_distances = []
center_points = []
delta_z = []

for i in range(seed_point_count):
    hood = xyz[indices[i]]
    if hood.shape[0] < minimum_in_hood:   
        continue

    # DBSCAN with stretched z
    labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)
    max_label = labels.max()

    for label in range(max_label + 1):
        cluster = hood[labels == label]
        if cluster.shape[0] < min_points:
            # Somehow this happens, must be a bug in open3d
            continue

        eigenvals, eigenvects = pca(cluster)
        cluster_linearity = linearity(eigenvals)


        if not ((cluster_linearity > linearity_tresh) and (eigenvals[0] > 0.04)):
            # Cluster not linear or too short
            continue
        clustercenter = cluster.mean(axis=0)

        # Check for rail pair
        side_vector = np.cross(eigenvects[:,0], np.array([0, 0, 1])) * (gauge + 0.07) # Add railtop width to gauge

        bothsides = np.vstack((clustercenter + side_vector, clustercenter - side_vector))
        indices_bothsides = tree.query_ball_point(bothsides, r=neighborhood_radius)

        # Linear cluster that is closest to one of the estimated side points (ignoring z)
        # but must me within r = 15 cm and almost parallel
        closest_cluster_distance = np.inf
        closest_cluster = None
        
        for j, side in enumerate(indices_bothsides):
            if len(side) > min_points: 
                labels_side = dbscan_stretchz_auto(xyz[side], min_points=min_points, stretch=4)
                max_label_side = labels_side.max()

                for label_side in range(max_label_side + 1):
                    sidecluster = xyz[side][labels_side == label_side]
                    if sidecluster.shape[0] < min_points:
                        continue
                    eigenvals_side, eigenvects_side = pca(sidecluster)
                    sidecluster_linearity = linearity(eigenvals_side)

                    if not ((sidecluster_linearity > linearity_tresh) and (eigenvals_side[0] > 0.04)): 
                        continue

                    sideclustercenter = sidecluster.mean(axis=0)

                    # Check distance
                    sidecluster_distance = np.linalg.norm(sideclustercenter[:2] - bothsides[j][:2]) # Ignore z
                    if sidecluster_distance < thresh_side_distance and sidecluster_distance < closest_cluster_distance:
                        # Check if parallel
                        cos_angle = np.abs(np.dot(eigenvects[:,0], eigenvects_side[:,0]))
                        if cos_angle > 0.9:  
                            closest_cluster = sidecluster
                            closest_cluster_distance = sidecluster_distance
                        
        if closest_cluster is None:
            continue
                
        closest_clustercenter = closest_cluster.mean(axis=0)
        lines.append(pv.Line(clustercenter, closest_clustercenter))
        side_distances.append(closest_cluster_distance)
        center_points.append(get_central_point(clustercenter, closest_clustercenter))
        delta_z.append(np.abs(clustercenter[2] - closest_clustercenter[2]))


print(f"Elapsed time: {time.time() - start:.2f} s")

Elapsed time: 7.08 s


In [27]:
pcd_all = pv.PolyData(xyz)

In [28]:
center_points = np.array(center_points)
diff_z = np.abs(np.array(delta_z))
pcd_centerpoints = pv.PolyData(center_points)
pcd_centerpoints["z"] = center_points[:,2]
pcd_centerpoints["diff_z"] = diff_z
pcd_centerpoints["side_distance"] = np.array(side_distances)

p = pv.Plotter()

if with_lines:
    # Add lines (SLOW) 
    for color, line in zip(diff_z, lines):  #zip(side_distances, lines):
        p.add_mesh(line, scalars=color, cmap='copper')


#p.add_mesh_threshold(pcd_seed, 'trackcount', all_scalars=True, render_points_as_spheres=True, point_size=10)

p.add_mesh(pcd_all, color='gray', point_size=1)

p.add_mesh_threshold(pcd_centerpoints, scalars="side_distance", point_size=5, cmap="magma_r")

p.show()


Widget(value='<iframe src="http://localhost:33795/index.html?ui=P_0x7fca10285e80_0&reconnect=auto" class="pyvi…

# Experimente

In [29]:
raise ValueError("Ende")

ValueError: Ende

In [None]:
i = 120


lines = []

hood = xyz[indices[i]]
if hood.shape[0] < minimum_in_hood:   
    raise ValueError("Not enough points in neighborhood") # continue
print(f"Seed point {i} has {hood.shape[0]} points in its neighborhood")

trackcount = 0
min_points = 10 

labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)

max_label = labels.max()
clustercount = max_label + 1
print()
print(f"Found {max_label + 1} clusters")
# print(max_label + 1, "Clusters")
for label in range(max_label + 1):
    cluster = hood[labels == label]
    eigenvals, eigenvects = pca(cluster)
    cluster_linearity = linearity(eigenvals)
    projected = hood @ eigenvects[:,0] # Project on first eigenvector
    # Length along first principal component
    # lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects))
    # print(cluster_linearity, end=" ")
    print(f"Cluster {label}: {labels[labels == label].shape[0]} points")
    print(f"Linearity: {cluster_linearity} {cluster_linearity > linearity_tresh}")
    print(f"Eigenvalues: {eigenvals}")
    print(f"Eigenvectors: {eigenvects}")
    
    if cluster_linearity > linearity_tresh:
        if eigenvals[0] > 0.02:
            trackcount += 1
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, stretch=eigenvals[0]*5))
        else:
            lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0.7], stretch=eigenvals[0]*5))
    else:
        lines.append(plot_first_eigenvec(cluster.mean(axis=0), eigenvects, color=[0, 0, 0], stretch=eigenvals[0]*5))
# print("\nTrack count", trackcount[i])
print(f"Track count: {trackcount}")
# print()

colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = [0.5, 0.5, 0.5, 1] # Farbe für Punkte, die zu keinem Cluster gehören
hood_pcd = o3d.geometry.PointCloud()
hood_pcd.points = o3d.utility.Vector3dVector(hood)
hood_pcd.colors = o3d.utility.Vector3dVector(colors[:,:3])
o3d.visualization.draw_geometries([hood_pcd] + lines)

Seed point 120 has 40 points in its neighborhood

Found 1 clusters
Cluster 0: 40 points
Linearity: 0.9795277671630143 False
Eigenvalues: [0.03033579 0.00062104 0.00012452]
Eigenvectors: [[-0.02697505 -0.88703503 -0.46091345]
 [-0.99437144 -0.02344853  0.10332283]
 [ 0.10245871 -0.46110631  0.88140977]]
Track count: 0


In [None]:
side = np.cross(eigenvects[:,0], np.array([0, 0, 1])) * gauge
side  

array([ 1.3470673 ,  0.49403146, -0.        ])

In [None]:
np.linalg.norm(side)

np.float64(1.4348022125784612)

In [None]:
cluster[:,2].mean()    

np.float64(-0.05587485034732538)

In [None]:
otherside = -side
otherside 

array([-1.3470673 , -0.49403146,  0.        ])

In [None]:
clustermean = cluster.mean(axis=0)
clustermean

array([-8.59903397,  7.47082874,  0.14914627])

In [None]:
clustermean + side

array([-7.25196667,  7.9648602 ,  0.14914627])

In [None]:
bothsides = np.vstack((clustermean + side, clustermean + otherside))
bothsides

NameError: name 'clustermean' is not defined

In [None]:
indices_sides = tree.query_ball_point(bothsides, r=0.1)

In [None]:
indices_sides

array([list([]), list([1641, 2260, 2396])], dtype=object)