## Import modules

In [1]:
import numpy as np
import open3d as o3d
from sklearn.ensemble import RandomForestClassifier
import pdal
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plyfile import PlyData, PlyElement
import gc
from itertools import chain
from sklearn.metrics import confusion_matrix, classification_report
import os
import joblib # Use to save model
from datetime import datetime, timezone
# from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from glob import glob
# from sklearn.model_selection import GridSearchCV
sns.set()

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
# ROOT_PLY = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/L001.ply"""

## Load in features

In [3]:
ROOT = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/cylinder/testing/r_0_1_grid_0_02_features.npz"""
ROOT2 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/cylinder/testing/r_0_2_grid_0_04_features.npz"""
ROOT3 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/cylinder/testing/r_0_4_grid_0_08_features.npz"""
ROOT4 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/cylinder/testing/r_0_8_grid_0_16_features.npz"""
ROOT5 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/cylinder/testing/r_1_6_grid_0_32_features.npz"""
ROOT6 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/cylinder/testing/r_3_2_grid_0_64_features.npz"""


In [4]:
fileList = [ROOT,ROOT2, ROOT3,ROOT4,ROOT5,ROOT6]

## Get computed features

In [5]:
das = [np.load(r)["array2"] for r in fileList]
das = np.hstack(das)

xyz = np.load(fileList[0])["array1"]

cls = np.load(fileList[0])["array3"]

## Load in points and labels

## Combine road markings with Ground points

In [6]:
# cls[cls==2] = 1

In [7]:
# cls[cls > 1] -= 1

## Get columns

In [8]:
cols = ["EigenSum","omnivariance","entropy","linearity","planarity","sphericity","curvature","verticality1","verticality2","count"]

In [9]:
# cols1 = [f"""{a}_radius1""" for a in cols]
# cols2 = [f"""{a}_radius2""" for a in cols]
ff = []

for i in range(1, len(fileList) + 1):
    col = [f"""{a}_radius{i}""" for a in cols]
    ff += col
    

# allCols = cols1 + cols2
    
    
    

In [10]:
allCols = ['X', 'Y','Z'] + ff + ['label']

In [11]:
allAtrs = np.hstack((xyz,das, cls.reshape(-1, 1)))

In [12]:
del xyz, das, cls

## Create dataframe

In [13]:
total_dataframe = pd.DataFrame(allAtrs, columns=allCols)
total_dataframe["label"] = total_dataframe["label"].astype(int)
for i in range(1, len(fileList) + 1):
    total_dataframe[f"""count_radius{i}"""] = total_dataframe[f"""count_radius{i}"""].astype(int)
# total_dataframe["count_radius1"] = total_dataframe["count_radius1"].astype(int)
# total_dataframe["count_radius2"] = total_dataframe["count_radius2"].astype(int)

# total_dataframe["labelName"] = total_dataframe["label"].apply(labelPoints)

In [14]:
# total_dataframe = total_dataframe.query("label != 0")
# total_dataframe = total_dataframe.copy()

In [15]:
gc.collect()

88

In [16]:
# ss = ss[ss["count"] > 10]

In [17]:
total_dataframe["label"].max()

np.int64(9)

## Create PLY files with features (Only do if they don't already exist)

In [18]:
# for i in range(1, 7):
#     ls = [col for col in list(total_dataframe) if col.endswith(f"""radius{i}""")]
#     ls = ['X','Y','Z'] + ls + ['label']
#     partial_df = total_dataframe[ls]

#     tpsOut = []
#     for idx, tpe in partial_df.dtypes.to_dict().items():
#         if tpe == 'int64':
#             tpsOut.append((idx, 'i4'))
#         elif tpe == 'float64':
#             tpsOut.append((idx, 'f8'))
            
#     vertex_data = np.empty(allAtrs.shape[0], dtype=tpsOut)
    
#     for t in tpsOut:
#         vertex_data[t[0]] = partial_df[t[0]].values
    
        
#     el = PlyElement.describe(vertex_data, 'vertex')
    
#     # Create a PlyData object and write to a PLY file
#     # Set text=True for ASCII PLY, or text=False for binary PLY
#     PlyData([el], text=False).write(f"""/home/sspiegel/CapstoneData/Paris/Toronto_3D/PC_with_features/L002_features_radius{i}.ply""")

In [19]:
rf = joblib.load("""/home/sspiegel/CapstoneData/Paris/RF_models/2025_11_15_Trained_cylinder_RF_Lille.joblib""")

In [20]:
total_dataframe.reset_index(inplace = True)

In [21]:
total_dataframe.shape

(21222580, 65)

In [22]:
X = total_dataframe[ff]

In [23]:
y = total_dataframe["label"]
print(y.min())

# yPred = rf.predict(X)

1


In [24]:
yPred = rf.predict(X)

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   24.7s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:  1.1min finished


In [25]:
(yPred == y).sum() / y.shape[0]

np.float64(0.8999185772889065)

In [26]:
cmNorm = confusion_matrix(y, yPred, normalize='true')

In [27]:
indx = ["ground","building","signage","bollard","trash can","barrier","pedestrian","car","vegetation"]

In [28]:
cmDF = pd.DataFrame(cmNorm, columns=indx, index = indx)

In [36]:
cmDF

Unnamed: 0,ground,building,signage,bollard,trash can,barrier,pedestrian,car,vegetation
ground,0.971419,0.001115,0.000304,6.5e-05,0.002395,0.000736,1.6e-05,0.017175,0.006776
building,0.007795,0.804939,0.006142,2.4e-05,0.005692,0.124273,0.000213,0.003704,0.047218
signage,0.0003,0.19584,0.707068,0.0001,0.001738,0.027314,0.000173,0.005214,0.062253
bollard,0.107427,0.0,0.806385,0.052754,0.009592,0.002878,0.0,0.004796,0.016169
trash can,0.067023,0.009242,0.021504,0.005333,0.430064,0.140139,0.002106,0.138905,0.185684
barrier,0.000292,0.068919,0.094111,5.5e-05,0.045514,0.450801,0.001131,0.033511,0.305666
pedestrian,0.002582,0.000979,0.125167,0.0,0.009526,0.08475,0.091961,0.110211,0.574824
car,0.006451,0.000648,0.000474,0.0,0.009179,0.03689,0.000337,0.900798,0.045223
vegetation,0.062854,0.005395,0.011702,0.00085,0.018066,0.018927,0.002815,0.048085,0.831306


In [37]:
cmDF.round(2).to_markdown()

'|            |   ground |   building |   signage |   bollard |   trash can |   barrier |   pedestrian |   car |   vegetation |\n|:-----------|---------:|-----------:|----------:|----------:|------------:|----------:|-------------:|------:|-------------:|\n| ground     |     0.97 |       0    |      0    |      0    |        0    |      0    |         0    |  0.02 |         0.01 |\n| building   |     0.01 |       0.8  |      0.01 |      0    |        0.01 |      0.12 |         0    |  0    |         0.05 |\n| signage    |     0    |       0.2  |      0.71 |      0    |        0    |      0.03 |         0    |  0.01 |         0.06 |\n| bollard    |     0.11 |       0    |      0.81 |      0.05 |        0.01 |      0    |         0    |  0    |         0.02 |\n| trash can  |     0.07 |       0.01 |      0.02 |      0.01 |        0.43 |      0.14 |         0    |  0.14 |         0.19 |\n| barrier    |     0    |       0.07 |      0.09 |      0    |        0.05 |      0.45 |         0    |

In [38]:
rep = classification_report(y, yPred)

In [42]:
report_dict = classification_report(y, yPred, target_names=indx, output_dict=True)

df_report = pd.DataFrame(report_dict).transpose()

# 5. Print the DataFrame
df_report.round(2)

Unnamed: 0,precision,recall,f1-score,support
ground,0.99,0.97,0.98,12042099.0
building,0.99,0.8,0.89,7193259.0
signage,0.51,0.71,0.59,109906.0
bollard,0.14,0.05,0.08,7298.0
trash can,0.34,0.43,0.38,115885.0
barrier,0.02,0.45,0.05,54818.0
pedestrian,0.17,0.09,0.12,11233.0
car,0.7,0.9,0.79,770451.0
vegetation,0.6,0.83,0.7,917631.0
accuracy,0.9,0.9,0.9,0.9


In [44]:
cmDF.to_csv("./results/results_ConfusionMatrix.csv")
df_report.to_csv("./results/results_PrecisionReport.csv")

In [43]:
df_report = df_report[["precision","recall","f1-score"]].round(2)


In [41]:
df_report.to_markdown()

'|              |   precision |   recall |   f1-score |\n|:-------------|------------:|---------:|-----------:|\n| ground       |        0.99 |     0.97 |       0.98 |\n| building     |        0.99 |     0.8  |       0.89 |\n| signage      |        0.51 |     0.71 |       0.59 |\n| bollard      |        0.14 |     0.05 |       0.08 |\n| trash can    |        0.34 |     0.43 |       0.38 |\n| barrier      |        0.02 |     0.45 |       0.05 |\n| pedestrian   |        0.17 |     0.09 |       0.12 |\n| car          |        0.7  |     0.9  |       0.79 |\n| vegetation   |        0.6  |     0.83 |       0.7  |\n| accuracy     |        0.9  |     0.9  |       0.9  |\n| macro avg    |        0.5  |     0.58 |       0.51 |\n| weighted avg |        0.95 |     0.9  |       0.92 |'

In [35]:
df_report

Unnamed: 0,precision,recall,f1-score
ground,0.99,0.97,0.98
building,0.99,0.8,0.89
signage,0.51,0.71,0.59
bollard,0.14,0.05,0.08
trash can,0.34,0.43,0.38
barrier,0.02,0.45,0.05
pedestrian,0.17,0.09,0.12
car,0.7,0.9,0.79
vegetation,0.6,0.83,0.7
accuracy,0.9,0.9,0.9
