## Import modules

In [46]:
import numpy as np
import open3d as o3d
from sklearn.ensemble import RandomForestClassifier
import pdal
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plyfile import PlyData, PlyElement
import gc
from itertools import chain
from sklearn.metrics import confusion_matrix, classification_report
import os
import joblib # Use to save model
from datetime import datetime, timezone
# from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from glob import glob
import sys
from plyfile import PlyElement, PlyData
# from sklearn.model_selection import GridSearchCV
sns.set()

In [2]:
# ROOT_PLY = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/L001.ply"""

## Load in features

In [3]:
ROOT = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/radial/testing/2025_12_02T03_59_Lille2_r_0_1_grid_0_02_features.npz"""
ROOT2 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/radial/testing/2025_12_02T05_06_Lille2_r_0_2_grid_0_04_features.npz"""
ROOT3 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/radial/testing/2025_12_02T05_44_Lille2_r_0_4_grid_0_08_features.npz"""
ROOT4 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/radial/testing/2025_12_02T11_23_Lille2_r_0_8_grid_0_16_features.npz"""
ROOT5 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/radial/testing/2025_12_02T12_41_Lille2_r_1_6_grid_0_32_features.npz"""
ROOT6 = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/radial/testing/2025_12_02T13_01_Lille2_r_3_2_grid_0_64_features.npz"""


In [4]:
fileList = [ROOT,ROOT2, ROOT3,ROOT4,ROOT5,ROOT6]

## Get computed features

In [5]:
das = [np.load(r)["array2"] for r in fileList]
das = np.hstack(das)

xyz = np.load(fileList[0])["array1"]

cls = np.load(fileList[0])["array3"]

## Load in points and labels

## Get columns

In [6]:
cols = ["EigenSum","omnivariance","entropy","linearity","planarity","sphericity","curvature","verticality1","verticality2","HeightVariance","HeightRange","count"]

In [7]:
# cols1 = [f"""{a}_radius1""" for a in cols]
# cols2 = [f"""{a}_radius2""" for a in cols]
ff = []

for i in range(1, len(fileList) + 1):
    col = [f"""{a}_radius{i}""" for a in cols]
    ff += col
    

# allCols = cols1 + cols2
    
    
    

In [8]:
allCols = ['X', 'Y','Z'] + ff + ['label']

In [9]:
allAtrs = np.hstack((xyz,das, cls.reshape(-1, 1)))

In [10]:
del xyz, das, cls

## Create dataframe

In [11]:
total_dataframe = pd.DataFrame(allAtrs, columns=allCols).astype('float32')
total_dataframe["label"] = total_dataframe["label"].astype('int32')
for i in range(1, len(fileList) + 1):
    total_dataframe[f"""count_radius{i}"""] = total_dataframe[f"""count_radius{i}"""].astype('int32')
# total_dataframe["count_radius1"] = total_dataframe["count_radius1"].astype(int)
# total_dataframe["count_radius2"] = total_dataframe["count_radius2"].astype(int)

# total_dataframe["labelName"] = total_dataframe["label"].apply(labelPoints)

In [12]:
total_dataframe.dtypes

X                         float32
Y                         float32
Z                         float32
EigenSum_radius1          float32
omnivariance_radius1      float32
                           ...   
verticality2_radius6      float32
HeightVariance_radius6    float32
HeightRange_radius6       float32
count_radius6               int32
label                       int32
Length: 76, dtype: object

In [13]:
# total_dataframe = total_dataframe.query("label != 0")
# total_dataframe = total_dataframe.copy()

In [14]:
gc.collect()

0

In [15]:
# ss = ss[ss["count"] > 10]

In [16]:
total_dataframe["label"].max()

np.int32(9)

## Create PLY files with features (Only do if they don't already exist)

In [17]:
# for i in range(1, 7):
#     ls = [col for col in list(total_dataframe) if col.endswith(f"""radius{i}""")]
#     ls = ['X','Y','Z'] + ls + ['label']
#     partial_df = total_dataframe[ls]

#     tpsOut = []
#     for idx, tpe in partial_df.dtypes.to_dict().items():
#         if tpe == 'int64':
#             tpsOut.append((idx, 'i4'))
#         elif tpe == 'float64':
#             tpsOut.append((idx, 'f8'))
            
#     vertex_data = np.empty(allAtrs.shape[0], dtype=tpsOut)
    
#     for t in tpsOut:
#         vertex_data[t[0]] = partial_df[t[0]].values
    
        
#     el = PlyElement.describe(vertex_data, 'vertex')
    
#     # Create a PlyData object and write to a PLY file
#     # Set text=True for ASCII PLY, or text=False for binary PLY
#     PlyData([el], text=False).write(f"""/home/sspiegel/CapstoneData/Paris/Toronto_3D/PC_with_features/L002_features_radius{i}.ply""")

In [18]:
rf = joblib.load("""/home/sspiegel/CapstoneData/Paris/RF_models/2025_12_02_Trained_radial_RF_Lille_Height.joblib""")

In [19]:
total_dataframe.reset_index(inplace = True)

In [20]:
total_dataframe = total_dataframe.query("""label != 0""")

In [21]:
X = total_dataframe[ff]

In [22]:
y = total_dataframe["label"]
print(y.min())

# yPred = rf.predict(X)

1


In [23]:
yPred = rf.predict(X)

[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   13.1s
[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:   45.8s finished


In [24]:
(yPred == y).sum() / y.shape[0]

np.float64(0.9173142944919986)

In [25]:
cmNorm = confusion_matrix(y, yPred, normalize='true')

In [26]:
indx = ["ground","building","signage","bollard","trash can","barrier","pedestrian","car","vegetation"]

In [27]:
cmDF = pd.DataFrame(cmNorm, columns=indx, index = indx)

In [28]:
cmDF

Unnamed: 0,ground,building,signage,bollard,trash can,barrier,pedestrian,car,vegetation
ground,0.982115,0.00106,0.000417,0.0001140997,0.001693,0.001063,5.3e-05,0.007562,0.005921
building,0.005243,0.837275,0.00419,2.780381e-07,0.003188,0.102781,0.000218,0.004569,0.042535
signage,0.000164,0.241952,0.691773,0.0,0.00263,0.011273,0.000164,0.000136,0.051908
bollard,0.157303,0.002055,0.660455,0.09290216,0.002466,0.036722,0.001096,0.01288,0.034119
trash can,0.047651,0.002934,0.032627,0.02524054,0.481909,0.152919,0.018889,0.090624,0.147206
barrier,0.000821,0.038418,0.135193,0.0,0.014977,0.495786,5.5e-05,0.017914,0.296837
pedestrian,0.001157,0.001424,0.113683,0.0,0.013888,0.115107,0.136473,0.061604,0.556663
car,0.004135,0.000561,0.000524,9.085588e-06,0.007119,0.035946,0.000314,0.905358,0.046033
vegetation,0.069461,0.006654,0.019684,0.001106109,0.020365,0.026983,0.000446,0.027609,0.827691


In [29]:
cmDF.round(2).to_markdown()

'|            |   ground |   building |   signage |   bollard |   trash can |   barrier |   pedestrian |   car |   vegetation |\n|:-----------|---------:|-----------:|----------:|----------:|------------:|----------:|-------------:|------:|-------------:|\n| ground     |     0.98 |       0    |      0    |      0    |        0    |      0    |         0    |  0.01 |         0.01 |\n| building   |     0.01 |       0.84 |      0    |      0    |        0    |      0.1  |         0    |  0    |         0.04 |\n| signage    |     0    |       0.24 |      0.69 |      0    |        0    |      0.01 |         0    |  0    |         0.05 |\n| bollard    |     0.16 |       0    |      0.66 |      0.09 |        0    |      0.04 |         0    |  0.01 |         0.03 |\n| trash can  |     0.05 |       0    |      0.03 |      0.03 |        0.48 |      0.15 |         0.02 |  0.09 |         0.15 |\n| barrier    |     0    |       0.04 |      0.14 |      0    |        0.01 |      0.5  |         0    |

In [30]:
rep = classification_report(y, yPred)

In [31]:
report_dict = classification_report(y, yPred, target_names=indx, output_dict=True)

df_report = pd.DataFrame(report_dict).transpose()

# 5. Print the DataFrame
df_report.round(2)

Unnamed: 0,precision,recall,f1-score,support
ground,0.99,0.98,0.99,12042099.0
building,0.99,0.84,0.91,7193259.0
signage,0.52,0.69,0.59,109906.0
bollard,0.11,0.09,0.1,7298.0
trash can,0.45,0.48,0.46,115885.0
barrier,0.03,0.5,0.06,54818.0
pedestrian,0.23,0.14,0.17,11233.0
car,0.81,0.91,0.86,770451.0
vegetation,0.62,0.83,0.71,917631.0
accuracy,0.92,0.92,0.92,0.92


In [32]:
cmDF = cmDF.round(2)
cmDF.to_csv("./results/results_ConfusionMatrix.csv")


In [33]:
df_report = df_report[["precision","recall","f1-score"]].round(2)
df_report.to_csv("./results/results_PrecisionReport.csv")

In [34]:
df_report.to_markdown()

'|              |   precision |   recall |   f1-score |\n|:-------------|------------:|---------:|-----------:|\n| ground       |        0.99 |     0.98 |       0.99 |\n| building     |        0.99 |     0.84 |       0.91 |\n| signage      |        0.52 |     0.69 |       0.59 |\n| bollard      |        0.11 |     0.09 |       0.1  |\n| trash can    |        0.45 |     0.48 |       0.46 |\n| barrier      |        0.03 |     0.5  |       0.06 |\n| pedestrian   |        0.23 |     0.14 |       0.17 |\n| car          |        0.81 |     0.91 |       0.86 |\n| vegetation   |        0.62 |     0.83 |       0.71 |\n| accuracy     |        0.92 |     0.92 |       0.92 |\n| macro avg    |        0.53 |     0.61 |       0.54 |\n| weighted avg |        0.96 |     0.92 |       0.94 |'

In [35]:
df_report

Unnamed: 0,precision,recall,f1-score
ground,0.99,0.98,0.99
building,0.99,0.84,0.91
signage,0.52,0.69,0.59
bollard,0.11,0.09,0.1
trash can,0.45,0.48,0.46
barrier,0.03,0.5,0.06
pedestrian,0.23,0.14,0.17
car,0.81,0.91,0.86
vegetation,0.62,0.83,0.71
accuracy,0.92,0.92,0.92


## Create Point Cloud

In [39]:
total_dataframe.shape

(21222580, 77)

In [47]:
outs = total_dataframe[["X","Y","Z"]]
outs["predicted_label"] = yPred
outPly = """/home/sspiegel/CapstoneData/Paris/training_10_classes/pickleFiles/radial/testing/Point_Features/Lille2_pred.ply"""

tpsOut = []
for idx, tpe in outs.dtypes.to_dict().items():
    if tpe == 'int32':
        tpsOut.append((idx, 'i4'))
    elif tpe == 'float32':
        tpsOut.append((idx, 'f4'))
        
vertex_data = np.empty(outs.shape[0], dtype=tpsOut)

for t in tpsOut:
    vertex_data[t[0]] = outs[t[0]].values

    
el = PlyElement.describe(vertex_data, 'vertex')


PlyData([el], text=False).write(outPly)