In [14]:
import pandas as pd
import numpy as np
import csv
import re

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import cv2
import json

import skimage.measure

import rasterio
from rasterio.features import shapes

import matplotlib.patches as mpatches
from shapely.geometry import Point, Polygon, shape, mapping
import shapely
import geopandas as gpd

from matplotlib.path import Path
import laspy
import open3d as o3d
import os

from skopt import BayesSearchCV
from sklearn.neighbors import LocalOutlierFactor
import glob


In [15]:
path_to_folder = 'lof/'
results_name = "results_outliers_lof.txt"

In [16]:
cols = ["file_id",
        "filepath",
        "pct_kept_powerline",
        "pct_lost_datapoints",
        "total_amount_points",
        "total_amount_wire",
        "new_total_amount_points",
        "lost_wire",
        "params"]

with open(path_to_folder+results_name, 'r') as f:
    lines = f.read()
lines = lines.split('\n')[:-1]

# Find the dictionary
results = []
for line in lines:
    match = re.search("\{.*\}", line)
    start, end = match.span()
    params = line[start:end]
    data = line[:start-1].split(',')
    results.append(data+[params])

df = pd.DataFrame(results, columns=cols)

df['file_id'] = df['file_id'].astype(int)
df['pct_kept_powerline'] = df['pct_kept_powerline'].astype(float)
df['pct_lost_datapoints'] = df['pct_lost_datapoints'].astype(float)

df['total_amount_points'] = df['total_amount_points'].astype(int)
df['total_amount_wire'] = df['total_amount_wire'].astype(int)
df['new_total_amount_points'] = df['new_total_amount_points'].astype(int)
df['lost_wire'] = df['lost_wire'].astype(int)

In [17]:
df.params[0]

"{'contamination': 0.05309688553104979, 'n_neighbors': 68, 'path': '/home/nxw500/data/'}"

In [22]:
runs = [] 
for i in df['params'].unique():
    tmpDF = df[df['params'] == i]
    runs.append(tmpDF)
    
LatexCodePre = r"""
\begin{table}[H]
    {\tiny\tabcolsep=2pt
    \begin{adjustbox}{width=1.2\linewidth,center}
    \begin{tabular}{clllllll}
    \multicolumn{1}{l}{\textbf{}} &
      \multicolumn{1}{c}{\textbf{Score1}} &
      \multicolumn{1}{c}{\textbf{Score2}} &
      \multicolumn{1}{c}{\textbf{Pct PL Rem}} &
      \multicolumn{1}{c}{\textbf{Max PL Rem}} &
      \multicolumn{1}{c}{\textbf{Pct DP Rem}} &
      \multicolumn{1}{c}{\textbf{N Neighbors}} &
      \multicolumn{1}{c}{\textbf{Contamination}}\\
      """

LatexCodePost = """    
    \end{tabular}
    \end{adjustbox}}
    \caption{Caption}
    \label{tab:my_label}
\end{table}
"""

epsilon = 0.0001
alpha = 0.999

middle = ""

scores1 = []
scores2 = []
for iteration, run in enumerate(runs):
    params = run.iloc[0].params.replace("'", '"')
    params = json.loads(params)
    
    score1 = 0
    if 1-np.mean(run['pct_kept_powerline']) <= epsilon:
        score1 = np.mean(run['pct_lost_datapoints'])
    scores1.append(score1)
    
    score2 = alpha * np.mean(run['pct_kept_powerline']) + (1-alpha)*np.mean(run['pct_lost_datapoints'])
    scores2.append(score2)
    
    pctplrem = 1-np.mean(run['pct_kept_powerline'])
    maxplrem = 1-np.min(run['pct_kept_powerline'])
    pctdprem = np.mean(run['pct_lost_datapoints'])
    
    n_neighbors = params['n_neighbors']
    contamination = params['contamination']
    
    tmp = r"\textbf{"+str(iteration+1)+"}"+" & "
    tmp += "{:.8f}".format(score1)+" & "
    tmp += "{:.8f}".format(score2)+" & "
    tmp += "{:.8f}".format(pctplrem)+" & "
    tmp += "{:.8f}".format(maxplrem)+" & "
    tmp += "{:.8f}".format(pctdprem)+" & "
    tmp += str(n_neighbors)+" & "
    tmp += str(contamination)+"\\\\\n"
    middle+=tmp
    
print(np.max(scores1))
print(np.max(scores2))

0
0.9950226086705853


In [23]:
print(LatexCodePre+middle+LatexCodePost)


\begin{table}[H]
    {\tiny\tabcolsep=2pt
    \begin{adjustbox}{width=1.2\linewidth,center}
    \begin{tabular}{clllllll}
    \multicolumn{1}{l}{\textbf{}} &
      \multicolumn{1}{c}{\textbf{Score1}} &
      \multicolumn{1}{c}{\textbf{Score2}} &
      \multicolumn{1}{c}{\textbf{Pct PL Rem}} &
      \multicolumn{1}{c}{\textbf{Max PL Rem}} &
      \multicolumn{1}{c}{\textbf{Pct DP Rem}} &
      \multicolumn{1}{c}{\textbf{N Neighbors}} &
      \multicolumn{1}{c}{\textbf{Contamination}}\\
      \textbf{1} & 0.00000000 & 0.99502261 & 0.00398147 & 0.02829452 & 0.00010025 & 68 & 0.05309688553104979\\
\textbf{2} & 0.00000000 & 0.99502261 & 0.00398147 & 0.02829452 & 0.00010025 & 98 & 0.025837907825612842\\
\textbf{3} & 0.00000000 & 0.99502261 & 0.00398147 & 0.02829452 & 0.00010025 & 7 & 0.05286964551580302\\
\textbf{4} & 0.00000000 & 0.99502261 & 0.00398147 & 0.02829452 & 0.00010025 & 79 & 0.014742833775305223\\
\textbf{5} & 0.00000000 & 0.99502261 & 0.00398147 & 0.02829452 & 0.00010025 & 25 &

In [24]:
runs = [] 
for i in df['params'].unique():
    tmpDF = df[df['params'] == i]
    runs.append(tmpDF)

epsilon = 0.01
best_score_1 = 0
best_run_1 = None

alpha = 0.95
best_score_2 = 0
best_run_2 = None

for run in runs:
    if 1-np.mean(run['pct_kept_powerline']) <= epsilon:
        tmp_score_1 = np.mean(run['pct_lost_datapoints'])
        if tmp_score_1 > best_score_1:
            best_run_1 = run
            best_score_1 = tmp_score_1    
    
    tmp_score_2 = alpha * np.mean(run['pct_kept_powerline']) + (1-alpha)*np.mean(run['pct_lost_datapoints'])
    if tmp_score_2 > best_score_2:
        best_run_2 = run
        best_score_2 = tmp_score_2

In [25]:
print("Minimum Kept Powerline: ", np.min(best_run_1['pct_kept_powerline']))
print("Avg Kept Powerline: ", np.mean(best_run_1['pct_kept_powerline']))
print("Avg Data reduction: ", np.mean(best_run_1['pct_lost_datapoints']))
best_score_1

Minimum Kept Powerline:  0.9717054767789775
Avg Kept Powerline:  0.9960185269450784
Avg Data reduction:  0.00010025245202628744


0.00010025245202628744

In [26]:
print("Minimum Kept Powerline: ", np.min(best_run_2['pct_kept_powerline']))
print("Avg Kept Powerline: ", np.mean(best_run_2['pct_kept_powerline']))
print("Avg Data reduction: ", np.mean(best_run_2['pct_lost_datapoints']))
best_score_2

Minimum Kept Powerline:  0.9717054767789775
Avg Kept Powerline:  0.9960185269450784
Avg Data reduction:  0.00010025245202628744


0.9462226132204258