# DBSCAN Refractoring mit Classes
langsamer: 1.33 statt 1.27 s (mean of 10 runs)

In [1]:
import pdal 
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d
from scipy.spatial import KDTree
from scipy.linalg import svd 
from osgeo import ogr
import pyvista as pv
import os
import json
import time 

from interessant import * # Bei Änderungen Kernel neu starten

ogr.UseExceptions()

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
tmpdir = "/media/riannek/minimax/gleis/temp_fertig"

# Bahnsteig: 29; Gleis hohe Intensität: 11; Weiche B: 16; Unterirdischer Bhf: 20; Gleis weit abseits: 23; Betondeckel: 28; Zug run 14 A (in run24 Achszähler): 6; 
# Viele Gleise: 33; Anfang Weiche: 34; OLA gleiche H: 35; Y: 37

key = list(interessant.keys())[43] 

filename = interessant[key]
filename = filename.split(".")[0] + ".ply"
print(key, filename)


if not os.path.exists(os.path.join(tmpdir, filename)):
    raise FileNotFoundError(filename)

Auch viele Gleise 4480875_5356950.ply


In [3]:
thresh_side_distance = 0.15   # 0.1 ist meist gut, aber in manchen Fällen zu wenig 

intensity_threshold = 14500
downsample_radius = 0.45 # 0.4
neighborhood_radius = 0.5 # 0.5

min_points = 10
minimum_in_hood = 10
linearity_tresh = 0.98

gauge = 1.435

In [4]:
runfolder = "/media/riannek/minimax/gleis/run24-2024-08-13"

import subprocess
# subprocess.Popen(["pyvistaviewer", os.path.join(runfolder, filename.split(".")[0] + ".copc.laz")])

## Datei incl. margin aus Nachbarkacheln

In [5]:
gpkg = ogr.Open(os.path.join(tmpdir, "temp.gpkg"))
layer = gpkg.GetLayerByName("tiles")

In [6]:
def extend_bbox(bbox, margin=2):
    # MinX, MaxX, MinY, MaxY
    return (bbox[0] - margin, bbox[1] + margin, bbox[2] - margin, bbox[3] + margin)

def get_bbox_polygon(bbox):  
    ring = ogr.Geometry(ogr.wkbLinearRing)      
    ring.AddPoint_2D(bbox[0], bbox[2])  # MinX, MinY
    ring.AddPoint_2D(bbox[1], bbox[2])  # MaxX, MinY
    ring.AddPoint_2D(bbox[1], bbox[3])  # MaxX, MaxY
    ring.AddPoint_2D(bbox[0], bbox[3])  # MinX, MaxY
    ring.AddPoint_2D(bbox[0], bbox[2])  # Close ring
    geom = ogr.Geometry(ogr.wkbPolygon)
    geom.AddGeometry(ring)
    return geom

In [7]:
filter = f"filename = '{filename}'"
layer.SetAttributeFilter(filter)
feature = layer.GetNextFeature()
layer.SetAttributeFilter(None)
bbox = feature.GetGeometryRef().GetEnvelope()
extended = extend_bbox(bbox, margin=2)
bbox_geom = get_bbox_polygon(extended)
layer.SetSpatialFilter(bbox_geom)
tiles = [f.GetField("filename") for f in layer]
layer.SetSpatialFilter(None) 

print(tiles) 

['4480850_5356975.ply', '4480875_5356925.ply', '4480900_5356975.ply', '4480875_5356950.ply', '4480850_5356925.ply', '4480875_5356975.ply', '4480850_5356950.ply', '4480900_5356925.ply', '4480900_5356950.ply']


In [8]:
bbox # MinX, MaxX, MinY, MaxY

(4480875.0, 4480900.0, 5356950.0, 5356975.0)

In [9]:
del gpkg

In [10]:
bounds = f"([{extended[0]}, {extended[1]}], [{extended[2]}, {extended[3]}])" 
bounds

'([4480873.0, 4480902.0], [5356948.0, 5356977.0])'

In [11]:
readers = [pdal.Reader(os.path.join(tmpdir, tile)) for tile in tiles]
pipeline = pdal.Pipeline(readers) | pdal.Filter.merge() | pdal.Filter.crop(bounds=bounds)
pipeline.execute()
points = pipeline.arrays[0]
points.shape 

(215710,)

## Seed Points

In [12]:
low_intensity = points[points["Intensity"] < intensity_threshold]
low_intensity.shape

(93697,)

In [13]:
bounds = f"([{bbox[0]}, {bbox[1]}], [{bbox[2]}, {bbox[3]}])" 
bounds

'([4480875.0, 4480900.0], [5356950.0, 5356975.0])'

In [14]:
# Downsample with poisson sampling (only original bbox)

downsampling_pipeline = pdal.Filter.crop(bounds=bounds).pipeline(low_intensity) | pdal.Filter("filters.sample", radius=downsample_radius)
downsampling_pipeline.execute()
seed_points = downsampling_pipeline.arrays[0]
seed_point_count = seed_points.shape[0]
seed_point_count

477

## KD Tree

In [15]:
xyz = np.vstack((points['X'], points['Y'], points['Z'])).transpose()
xyz_seed = np.vstack((seed_points['X'], seed_points['Y'], seed_points['Z'])).transpose()

In [16]:
offset = xyz.mean(axis=0).round() 
xyz -= offset
xyz_seed -= offset

In [17]:
tree = KDTree(xyz)  

# indices: ndarray (dtype object) with a list of indices for each seed point
indices = tree.query_ball_point(xyz_seed, r=neighborhood_radius)

## Funktionen

In [18]:
def pca(cloud):
    """Use PCA to get einvalues and eigenvectors of a point cloud
    
    Returns (eigenvalues, eigenvectors)
    """
    if cloud.shape[0] < 3:
        raise ValueError("Point cloud must have at least 3 points")
    mean = np.mean(cloud, axis=0)
    centered = cloud - mean
    U, S, Vt = svd(centered, full_matrices=False)
    eigenvals = S**2/(cloud.shape[0]-1)
    # Returned vectors are in columns, first vector is eigenvec[:, 0] == eigenvec.T[0]
    return eigenvals, Vt.T

def linearity(eigenvals):
    """Calculate the linearity of a point cloud"""
    return (eigenvals[0] - eigenvals[1]) / eigenvals[0]

def verticality(eigenvects):
    """Calculate the verticality of a point cloud
    
    1 minus z of the third eigenvector
    """
    return 1 - eigenvects.T[2][2]

In [19]:
def dbscan_stretchz_auto(xyz, min_points=10, stretch=1.5):

    pointcount = xyz.shape[0]
    if pointcount <= min_points:
        return np.ones(pointcount, dtype=np.int8) * -1
    
    eps = 50 / pointcount
    eps = max(eps, 0.06)
    
    xyz = xyz.copy()
    xyz[:, 2] *= stretch
    hood_pcd = o3d.geometry.PointCloud()
    hood_pcd.points = o3d.utility.Vector3dVector(xyz) 
    # eps is min distance between clusters
    labels = np.array(hood_pcd.cluster_dbscan(eps=eps, min_points=min_points, print_progress=False))
    return labels

In [20]:
def plot_cluster_line(cluster, eigenvects, stretch=0.2):
    start = cluster.mean(axis=0)
    vector = eigenvects.T[0] * stretch
    end = start + vector
    line = pv.Line(start, end)

    return line

In [21]:
def plot_first_eigenvec(point, eigenvects, color=[1, 0, 0], stretch=1):
    """Plot a vector as a line"""
    vector = eigenvects.T[0] * stretch
    end = point + vector
    line = o3d.geometry.LineSet()
    line.points = o3d.utility.Vector3dVector([point, end])
    line.lines = o3d.utility.Vector2iVector([[0, 1]])
    line.colors = o3d.utility.Vector3dVector([color])
    return line

In [22]:
def get_central_point(a, b):
    """Get the point in the middle of two points"""
    vector = b - a
    return a + vector / 2

In [23]:
def to_color(skalar, cmap=plt.cm.plasma):
    return cmap(skalar)[:3]

## Iteration über Seed Points

In [24]:
class Cluster:
    def __init__(self, points, eigenvals, eigenvects):
        self.points = points
        self.eigenvals = eigenvals
        self.eigenvects = eigenvects
        self.clustercenter = self.points.mean(axis=0)
        self.pair = None
        
    def sidevector(self):
        """Vector pointing to the other rail of the rail pair (or opposite direction)"""
        return np.cross(self.eigenvects[:,0], np.array([0, 0, 1])) * (gauge + 0.07) # Add railtop width to gauge
    
    def bothsides(self):
        """Return array with two rows containing the possible positions of the other rail"""
        sidevector = self.sidevector()
        return np.array([self.clustercenter + sidevector, self.clustercenter - sidevector])
    
    def direction(self):
        """Direction of the linear cluster"""
        return self.eigenvects[:,0]
    


In [25]:
class HoodClusters:
    thresh_side_distance = 0.15 

    def __init__(self, points): 
        self.points = points
        self.clusters = []
        if points.shape[0] < minimum_in_hood:
            return
        
        labels = dbscan_stretchz_auto(points, min_points=min_points)
        max_label = labels.max()
        
        for label in range(max_label + 1):
            cluster = self.points[labels == label]
            if cluster.shape[0] < min_points:
                # Somehow this happens, must be a bug in open3d
                continue

            eigenvals, eigenvects = pca(cluster)
            cluster_linearity = linearity(eigenvals)


            if not ((cluster_linearity > linearity_tresh) and (eigenvals[0] > 0.04)):
                # Cluster not linear or too short
                continue

            self.clusters.append(Cluster(cluster, eigenvals, eigenvects))

    def check_cluster_for_railpair(self, cluster, pointcloud, tree):
        bothsides = cluster.bothsides()
        indices_bothsides = tree.query_ball_point(bothsides, r=neighborhood_radius)
        # Linear cluster that is closest to one of the estimated side points
        # but must me within r = 15 cm and almost parallel

        closest_cluster_distance = np.inf
        closest_cluster = None
        
        for j, side in enumerate(indices_bothsides):
            side_hood = HoodClusters(pointcloud[side])
            for side_cluster in side_hood.clusters:

                sidecluster_distance = np.linalg.norm(side_cluster.clustercenter[:2] - bothsides[j][:2])
                # Nearest and near enough?
                if sidecluster_distance < thresh_side_distance and sidecluster_distance < closest_cluster_distance:
                    # Check if parallel
                    cos_angle = np.abs(np.dot(cluster.eigenvects[:,0], side_cluster.eigenvects[:,0]))
                    if cos_angle > 0.9:  
                        closest_cluster = side_cluster
                        closest_cluster_distance = sidecluster_distance

        return closest_cluster
    
    def check_for_railpairs(self, pointcloud, tree):
        clusters_ok = []
        for cluster in self.clusters:
            pair = self.check_cluster_for_railpair(cluster, pointcloud, tree)
            if pair:
                cluster.pair = pair
                clusters_ok.append(cluster) 

        self.clusters = clusters_ok
        return len(clusters_ok)
    
    @property
    def clustercount(self):
        return len(self.clusters)

In [26]:
with_lines = True

In [27]:
start = time.time()

lines = []
side_distances = []
center_points = []
delta_z = []

for i in range(seed_point_count):
    hood = HoodClusters(xyz[indices[i]])
    if hood.clustercount > 0:
        hood.check_for_railpairs(xyz, tree)
        for cluster in hood.clusters:
            if with_lines:
                lines.append(plot_cluster_line(cluster.points, cluster.eigenvects))
            side_distances.append(cluster.sidevector())
            center_points.append(cluster.clustercenter)
            delta_z.append(cluster.clustercenter[2] - xyz[indices[i]].mean(axis=0)[2])

print(f"Time: {time.time() - start:.2f} s")

Time: 2.31 s


In [28]:
pcd_all = pv.PolyData(xyz)

In [29]:
center_points = np.array(center_points)

In [30]:
pcd_centerpoints = pv.PolyData(center_points)
pcd_centerpoints["z"] = center_points[:,2]
pcd_centerpoints["diff_z"] = np.abs(np.array(delta_z))

p = pv.Plotter()

if with_lines:
    # Add lines (SLOW)
    # for color, line in zip(side_distances, lines):
    #     p.add_mesh(line, scalars=color, cmap='plasma')
    for line in lines:
        p.add_mesh(line, color="red")

#p.add_mesh_threshold(pcd_seed, 'trackcount', all_scalars=True, render_points_as_spheres=True, point_size=10)

p.add_mesh(pcd_all, color='gray', point_size=1)

p.add_mesh(pcd_centerpoints, scalars="diff_z", point_size=5, cmap="cividis")

p.show()

# Farbe: Punkte nach z-Wert oder diff z
# Linien: Differenz Clusterzentrum zu Spurweite

Widget(value='<iframe src="http://localhost:36399/index.html?ui=P_0x7fdac6cf90d0_0&reconnect=auto" class="pyvi…

# Experimente

In [31]:
raise ValueError("Ende")

ValueError: Ende

In [None]:


lines = []
side_distances = []
center_points = []
delta_z = []

def test():
    for i in range(seed_point_count):
        hood = xyz[indices[i]]
        if hood.shape[0] < minimum_in_hood:   
            continue

        # DBSCAN with stretched z
        labels = dbscan_stretchz_auto(hood, min_points=min_points, stretch=4)
        max_label = labels.max()

        for label in range(max_label + 1):
            cluster = hood[labels == label]
            if cluster.shape[0] < min_points:
                # Somehow this happens, must be a bug in open3d
                continue

            eigenvals, eigenvects = pca(cluster)
            cluster_linearity = linearity(eigenvals)


            if not ((cluster_linearity > linearity_tresh) and (eigenvals[0] > 0.04)):
                # Cluster not linear or too short
                continue
            clustercenter = cluster.mean(axis=0)

            # Check for rail pair
            side_vector = np.cross(eigenvects[:,0], np.array([0, 0, 1])) * (gauge + 0.07) # Add railtop width to gauge

            bothsides = np.vstack((clustercenter + side_vector, clustercenter - side_vector))
            indices_bothsides = tree.query_ball_point(bothsides, r=neighborhood_radius)

            # Linear cluster that is closest to one of the estimated side points
            # but must me within r = 15 cm and almost parallel
            closest_cluster_distance = np.inf
            closest_cluster = None
            
            for j, side in enumerate(indices_bothsides):
                if len(side) > min_points: 
                    labels_side = dbscan_stretchz_auto(xyz[side], min_points=min_points, stretch=4)
                    max_label_side = labels_side.max()

                    for label_side in range(max_label_side + 1):
                        sidecluster = xyz[side][labels_side == label_side]
                        if sidecluster.shape[0] < min_points:
                            continue
                        eigenvals_side, eigenvects_side = pca(sidecluster)
                        sidecluster_linearity = linearity(eigenvals_side)

                        if not ((sidecluster_linearity > linearity_tresh) and (eigenvals_side[0] > 0.04)): 
                            continue

                        sideclustercenter = sidecluster.mean(axis=0)

                        sidecluster_distance = np.linalg.norm(sideclustercenter - bothsides[j])
                        # Check distance
                        if sidecluster_distance < thresh_side_distance and sidecluster_distance < closest_cluster_distance:
                            # Check if parallel
                            cos_angle = np.abs(np.dot(eigenvects[:,0], eigenvects_side[:,0]))
                            if cos_angle > 0.9:  
                                closest_cluster = sidecluster
                                closest_cluster_distance = sidecluster_distance
                            
            if closest_cluster is None:
                continue
                    
            closest_clustercenter = closest_cluster.mean(axis=0)
            lines.append(pv.Line(clustercenter, closest_clustercenter))
            side_distances.append(closest_cluster_distance)
            center_points.append(get_central_point(clustercenter, closest_clustercenter))
            delta_z.append(np.abs(clustercenter[2] - closest_clustercenter[2]))


times = []
for i in range(10):
    start = time.time()
    test()
    times.append(time.time() - start)

np.mean(times)

np.float64(1.268465781211853)

In [None]:
times = []
for i in range(10):
    start = time.time()
  
    for i in range(seed_point_count):
        hood = HoodClusters(xyz[indices[i]])
        if hood.clustercount > 0:
            hood.check_for_railpairs(xyz, tree)
            for cluster in hood.clusters:
                if with_lines:
                    lines.append(plot_cluster_line(cluster.points, cluster.eigenvects))
                side_distances.append(cluster.sidevector())
                center_points.append(cluster.clustercenter)
                delta_z.append(cluster.clustercenter[2] - xyz[indices[i]].mean(axis=0)[2])

    times.append(time.time() - start)

np.mean(times)

np.float64(1.3262649536132813)