In [1]:
import pandas as pd
import numpy as np
import csv
import re

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import cv2
import json

import skimage.measure

import rasterio
from rasterio.features import shapes

import matplotlib.patches as mpatches
from shapely.geometry import Point, Polygon, shape, mapping
import shapely
import geopandas as gpd

from matplotlib.path import Path
import laspy
import open3d as o3d
import os

from skopt import BayesSearchCV
from sklearn.neighbors import LocalOutlierFactor
import glob


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
path_to_folder = 'o3d/'
results_name = "results_outliers_o3d.txt"

In [3]:
cols = ["file_id",
        "filepath",
        "pct_kept_powerline",
        "pct_lost_datapoints",
        "total_amount_points",
        "total_amount_wire",
        "new_total_amount_points",
        "lost_wire",
        "params"]

with open(path_to_folder+results_name, 'r') as f:
    lines = f.read()
lines = lines.split('\n')[:-1]

# Find the dictionary
results = []
for line in lines:
    match = re.search("\{.*\}", line)
    start, end = match.span()
    params = line[start:end]
    data = line[:start-1].split(',')
    results.append(data+[params])

df = pd.DataFrame(results, columns=cols)

df['file_id'] = df['file_id'].astype(int)
df['pct_kept_powerline'] = df['pct_kept_powerline'].astype(float)
df['pct_lost_datapoints'] = df['pct_lost_datapoints'].astype(float)

df['total_amount_points'] = df['total_amount_points'].astype(int)
df['total_amount_wire'] = df['total_amount_wire'].astype(int)
df['new_total_amount_points'] = df['new_total_amount_points'].astype(int)
df['lost_wire'] = df['lost_wire'].astype(int)

In [4]:
runs = [] 
for i in df['params'].unique():
    tmpDF = df[df['params'] == i]
    runs.append(tmpDF)
    
LatexCodePre = r"""
\begin{table}[H]
    {\tiny\tabcolsep=2pt
    \begin{adjustbox}{width=1.2\linewidth,center}
    \begin{tabular}{cllllllll}
    \multicolumn{1}{l}{\textbf{}} &
      \multicolumn{1}{c}{\textbf{Score1}} &
      \multicolumn{1}{c}{\textbf{Score2}} &
      \multicolumn{1}{c}{\textbf{Pct PL Rem}} &
      \multicolumn{1}{c}{\textbf{Max PL Rem}} &
      \multicolumn{1}{c}{\textbf{Pct DP Rem}} &
      \multicolumn{1}{c}{\textbf{NB Neighbors}} &
      \multicolumn{1}{c}{\textbf{Standard Ratio}} &
      \multicolumn{1}{c}{\textbf{Voxel Size}}\\
      """

LatexCodePost = """    
    \end{tabular}
    \end{adjustbox}}
    \caption{Caption}
    \label{tab:my_label}
\end{table}
"""

epsilon = 0.0001
alpha = 0.999

middle = ""

scores1 = []
scores2 = []
for iteration, run in enumerate(runs):
    params = run.iloc[0].params.replace("'", '"')
    params = json.loads(params)
    
    score1 = 0
    if 1-np.mean(run['pct_kept_powerline']) <= epsilon:
        score1 = np.mean(run['pct_lost_datapoints'])
    scores1.append(score1)
    
    score2 = alpha * np.mean(run['pct_kept_powerline']) + (1-alpha)*np.mean(run['pct_lost_datapoints'])
    scores2.append(score2)
    
    pctplrem = 1-np.mean(run['pct_kept_powerline'])
    maxplrem = 1-np.min(run['pct_kept_powerline'])
    pctdprem = np.mean(run['pct_lost_datapoints'])
    
    nb_neighbours = params['nb_neighbors']
    std_ratio = params['std_ratio']
    voxel_size = params['voxel_size']
    
    
    tmp = r"\textbf{"+str(iteration+1)+"}"+" & "
    tmp += "{:.8f}".format(score1)+" & "
    tmp += "{:.8f}".format(score2)+" & "
    tmp += "{:.8f}".format(pctplrem)+" & "
    tmp += "{:.8f}".format(maxplrem)+" & "
    tmp += "{:.8f}".format(pctdprem)+" & "
    tmp += str(nb_neighbours)+" & "
    tmp += str(std_ratio)+" & "
    tmp += str(voxel_size)+"\\\\\n"
    middle+=tmp
    
print(np.max(scores1))
print(np.max(scores2))

6.869793025182277e-05
0.9989990014358742


In [5]:
print(LatexCodePre+middle+LatexCodePost)


\begin{table}[H]
    {\tiny\tabcolsep=2pt
    \begin{adjustbox}{width=1.2\linewidth,center}
    \begin{tabular}{cllllllll}
    \multicolumn{1}{l}{\textbf{}} &
      \multicolumn{1}{c}{\textbf{Score1}} &
      \multicolumn{1}{c}{\textbf{Score2}} &
      \multicolumn{1}{c}{\textbf{Pct PL Rem}} &
      \multicolumn{1}{c}{\textbf{Max PL Rem}} &
      \multicolumn{1}{c}{\textbf{Pct DP Rem}} &
      \multicolumn{1}{c}{\textbf{NB Neighbors}} &
      \multicolumn{1}{c}{\textbf{Standard Ratio}} &
      \multicolumn{1}{c}{\textbf{Voxel Size}}\\
      \textbf{1} & 0.00004302 & 0.99899796 & 0.00000209 & 0.00003129 & 0.00004302 & 13 & 37 & 0.6\\
\textbf{2} & 0.00004221 & 0.99899796 & 0.00000209 & 0.00003129 & 0.00004221 & 9 & 47 & 0.6\\
\textbf{3} & 0.00004504 & 0.99898979 & 0.00001027 & 0.00005914 & 0.00004504 & 13 & 27 & 0.3\\
\textbf{4} & 0.00004368 & 0.99899509 & 0.00000496 & 0.00004254 & 0.00004368 & 7 & 31 & 0.5\\
\textbf{5} & 0.00004262 & 0.99898979 & 0.00001027 & 0.00005914 & 0.00004262 & 

In [20]:
runs = [] 
for i in df['params'].unique():
    tmpDF = df[df['params'] == i]
    runs.append(tmpDF)

epsilon = 0.0001
best_score_1 = 0
best_run_1 = None

alpha = 0.999
best_score_2 = 0
best_run_2 = None

for run in runs:
    if 1-np.mean(run['pct_kept_powerline']) <= epsilon:
        tmp_score_1 = np.mean(run['pct_lost_datapoints'])
        if tmp_score_1 > best_score_1:
            best_run_1 = run
            best_score_1 = tmp_score_1    
    
    tmp_score_2 = alpha * np.mean(run['pct_kept_powerline']) + (1-alpha)*np.mean(run['pct_lost_datapoints'])
    if tmp_score_2 > best_score_2:
        best_run_2 = run
        best_score_2 = tmp_score_2

In [41]:
print("score 1:")
print("Minimum Kept Powerline: ", np.min(best_run_1['pct_kept_powerline']))
print("Avg Kept Powerline: ", np.mean(best_run_1['pct_kept_powerline']))
print("Avg Data reduction: ", np.mean(best_run_1['pct_lost_datapoints']))

print()
print("score 2:")
print("Minimum Kept Powerline: ", np.min(best_run_2['pct_kept_powerline']))
print("Avg Kept Powerline: ", np.mean(best_run_2['pct_kept_powerline']))
print("Avg Data reduction: ", np.mean(best_run_2['pct_lost_datapoints']))
#print(best_run_1.iloc[0:15]['total_amount_points'] - best_run_1.iloc[0:15]['new_total_amount_points'])
#print(best_run_2.iloc[0:15]['total_amount_points'] - best_run_2.iloc[0:15]['new_total_amount_points'])

best_run_2.params.iloc[0]

score 1:
Minimum Kept Powerline:  0.999885753455958
Avg Kept Powerline:  0.9999736301894081
Avg Data reduction:  6.869793025182277e-05

score 2:
Minimum Kept Powerline:  0.9999843544652356
Avg Kept Powerline:  0.999998956964349
Avg Data reduction:  4.342848942103424e-05


"{'nb_neighbors': 20, 'path': '/home/nxw500/data/', 'std_ratio': 34, 'voxel_size': 0.7}"

In [8]:
best_params1 = best_run_1.iloc[0].params.replace("'", '"')
best_params1 = json.loads(best_params1)
print(best_params1)

best_params2 = best_run_2.iloc[0].params.replace("'", '"')
best_params2 = json.loads(best_params2)
print(best_params2)

{'nb_neighbors': 5, 'path': '/home/nxw500/data/', 'std_ratio': 15, 'voxel_size': 0.1}
{'nb_neighbors': 20, 'path': '/home/nxw500/data/', 'std_ratio': 34, 'voxel_size': 0.7}


In [9]:
def GetPathRelations(full_path_to_data):        
    ground_removed_image_paths = []
    laz_point_cloud_paths = []
        
    # Find full path to all images
    for path in glob.glob(full_path_to_data+'data/ImagesGroundRemovedSmall/*'):
        ground_removed_image_paths.append(path)
    
    # Find full path to all laz files
    for path in glob.glob(full_path_to_data+'data/LazFilesWithHeightParam/*'):
        laz_point_cloud_paths.append(path)
            
    ground_removed_image_paths.sort()
    laz_point_cloud_paths.sort()
    assert(len(ground_removed_image_paths)==len(laz_point_cloud_paths))
    return ground_removed_image_paths, laz_point_cloud_paths

all_path_relations = GetPathRelations("/home/frederik/data/TestData/")
path_tuples = list(zip(*all_path_relations))

best_params = best_params1
for p in path_tuples[10:11]:
    path_img, path_laz = p
    
    tmp_las = laspy.read(path_laz, laz_backend=laspy.compression.LazBackend.LazrsParallel)
    point_data = np.stack([tmp_las.X, tmp_las.Y, tmp_las.Z], axis=0).transpose((1, 0))

    # Create o3d Point Cloud
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(point_data)
    #o3d.visualization.draw_geometries([pcd])
    
    #o3d.visualization.draw_geometries([pcd])
    voxel_down_pcd = pcd.voxel_down_sample(voxel_size=best_params['voxel_size'])

    new_pcd, ind = voxel_down_pcd.remove_statistical_outlier(nb_neighbors=best_params['nb_neighbors'],
                                                                    std_ratio=best_params['std_ratio'])
    
    #o3d.visualization.draw_geometries([new_pcd])

4095


In [18]:
o3d.visualization.draw_geometries([pcd])

In [19]:
o3d.visualization.draw_geometries([new_pcd])

In [42]:
path_tuples[10:11]

[('/home/frederik/data/TestData/data/ImagesGroundRemovedSmall/PUNKTSKY_00005_1km_6205_513_max.tif',
  '/home/frederik/data/TestData/data/LazFilesWithHeightParam/PUNKTSKY_00005_1km_6205_513_hag_nn.laz')]