## Import modules

In [1]:
import numpy as np
import open3d as o3d
from sklearn.ensemble import RandomForestClassifier
import pdal
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plyfile import PlyData, PlyElement
import gc
from itertools import chain
from sklearn.metrics import confusion_matrix
import gc
import joblib # Use to load model
from datetime import datetime, timezone
sns.set()

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
# ROOT_PLY = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/L001.ply"""

## Load in features

In [3]:
ROOT  = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_29T20_05_L004_r_0_1_grid_0_02_features.npz"""
ROOT_2  = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_29T21_33_L004_r_0_2_grid_0_04_features.npz"""
ROOT_3  = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_29T23_36_L004_r_0_4_grid_0_08_features.npz"""
ROOT_4  = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_30T00_42_L004_r_0_8_grid_0_16_features.npz"""
ROOT_5  = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_30T01_07_L004_r_1_6_grid_0_32_features.npz"""
ROOT_6 = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_30T01_17_L004_r_3_2_grid_0_64_features.npz"""
# ROOT_7 = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_28T00_29_L001_r_6_4_grid_1_28_features.npz"""
# ROOT_8 = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_28T01_37_L001_r_12_8_grid_2_56_features.npz"""


In [4]:
fileList = [ROOT,ROOT_2, ROOT_3,ROOT_4,ROOT_5, ROOT_6]

## Get computed features

In [5]:
das = [np.load(r)["array2"] for r in fileList]
das = np.hstack(das)

## Load in points and labels

In [6]:
xyz = np.load(fileList[0])["array1"]

cls = np.load(fileList[0])["array3"]

## Combine road markings with Ground points

In [7]:
cls[cls==2] = 1

In [8]:
cls[cls > 1] -= 1

## Get columns

In [9]:
cols = ["EigenSum","omnivariance","entropy","linearity","planarity","sphericity","curvature","verticality1","verticality2","count"]

In [10]:
# cols1 = [f"""{a}_radius1""" for a in cols]
# cols2 = [f"""{a}_radius2""" for a in cols]
ff = []

for i in range(1, len(fileList) + 1):
    col = [f"""{a}_radius{i}""" for a in cols]
    ff += col
    

# allCols = cols1 + cols2
    
    
    

In [11]:
allCols = ['X', 'Y','Z'] + ff + ['label']

In [12]:
allAtrs = np.hstack((xyz,das, cls.reshape(-1, 1)))

## Create dataframe

In [13]:
total_dataframe = pd.DataFrame(allAtrs, columns=allCols)
total_dataframe["label"] = total_dataframe["label"].astype(int)
for i in range(1, len(fileList) + 1):
    total_dataframe[f"""count_radius{i}"""] = total_dataframe[f"""count_radius{i}"""].astype(int)
# total_dataframe["count_radius1"] = total_dataframe["count_radius1"].astype(int)
# total_dataframe["count_radius2"] = total_dataframe["count_radius2"].astype(int)

# total_dataframe["labelName"] = total_dataframe["label"].apply(labelPoints)

In [14]:
total_dataframe = total_dataframe.query("label != 0")
total_dataframe = total_dataframe.copy()

In [15]:
gc.collect()

434

In [15]:
# ss = ss[ss["count"] > 10]

## Create PLY files with features (Only do if they don't already exist)

In [16]:
# for i in range(1, 7):
#     ls = [col for col in list(total_dataframe) if col.endswith(f"""radius{i}""")]
#     ls = ['X','Y','Z'] + ls + ['label']
#     partial_df = total_dataframe[ls]

#     tpsOut = []
#     for idx, tpe in partial_df.dtypes.to_dict().items():
#         if tpe == 'int64':
#             tpsOut.append((idx, 'i4'))
#         elif tpe == 'float64':
#             tpsOut.append((idx, 'f8'))
            
#     vertex_data = np.empty(allAtrs.shape[0], dtype=tpsOut)
    
#     for t in tpsOut:
#         vertex_data[t[0]] = partial_df[t[0]].values
    
        
#     el = PlyElement.describe(vertex_data, 'vertex')
    
#     # Create a PlyData object and write to a PLY file
#     # Set text=True for ASCII PLY, or text=False for binary PLY
#     PlyData([el], text=False).write(f"""/home/sspiegel/CapstoneData/Paris/Toronto_3D/PC_with_features/L004_features_radius{i}.ply""")

## Instantiate Random Forest classifier

In [17]:
rf = joblib.load("""/home/sspiegel/CapstoneData/Paris/RF_models/2025_10_30T03_32_L001Trained_RF.joblib""")

In [18]:
rf

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [19]:
total_dataframe.reset_index(inplace = True)

In [20]:
total_dataframe

Unnamed: 0,index,X,Y,Z,EigenSum_radius1,omnivariance_radius1,entropy_radius1,linearity_radius1,planarity_radius1,sphericity_radius1,...,omnivariance_radius6,entropy_radius6,linearity_radius6,planarity_radius6,sphericity_radius6,curvature_radius6,verticality1_radius6,verticality2_radius6,count_radius6,label
0,0,61.237991,284.179016,6.422012,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.501201,0.721130,0.589691,0.108861,0.301448,0.176105,0.005630,0.047531,31,5
1,1,61.361008,284.220032,5.016006,0.004200,0.000844,0.026157,0.585046,0.351801,0.063152,...,0.778723,-0.934221,0.655846,0.193242,0.150912,0.100940,0.118923,1.349131,44,5
2,2,61.305008,284.348999,4.883011,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.778723,-0.934221,0.655846,0.193242,0.150912,0.100940,0.118923,1.349131,44,5
3,3,61.289001,284.049011,6.192001,0.003170,0.000782,0.021008,0.435503,0.435122,0.129374,...,0.585145,0.442596,0.662678,0.039110,0.298211,0.182333,0.117264,1.075547,35,5
4,4,61.270996,284.201050,6.057999,0.002164,0.000309,0.014463,0.797558,0.175734,0.026708,...,0.614214,0.208984,0.692377,0.053309,0.254314,0.162820,0.103009,1.020634,36,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6155481,6747643,128.925003,123.722046,5.803009,0.004301,0.000172,0.026349,0.314211,0.685339,0.000450,...,1.580064,-3.911565,0.252219,0.484340,0.263441,0.130985,0.004526,1.284384,129,2
6155482,6747644,128.906998,123.777039,5.751999,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.550553,-3.839633,0.262820,0.484277,0.252904,0.127082,0.027700,1.302276,129,2
6155483,6747645,128.869995,123.834045,5.700012,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.527157,-3.587861,0.248869,0.486176,0.264954,0.131420,0.021931,1.314530,127,2
6155484,6747646,128.817993,123.947021,5.597000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.487280,-3.612377,0.294273,0.467795,0.237932,0.122414,0.029047,1.301116,128,2


In [21]:
X = total_dataframe[ff].to_numpy()

In [22]:
y = total_dataframe["label"].to_numpy()

In [23]:
yPred = rf.predict(X)

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    5.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:   13.2s finished


In [24]:
(yPred == y).sum() / y.shape[0]

np.float64(0.928924539833248)

In [25]:
cm = confusion_matrix(y, yPred)

In [37]:
cmNorm = confusion_matrix(y, yPred)
cmNorm.sum(axis = 0)

array([3973785, 1200449,  477934,   37872,   98343,  231824,  135279])

In [30]:
indx = ["Ground","Natural","Building","Utility_Line","Pole","Car","Fence"]

In [31]:
cmDF = pd.DataFrame(cm, columns = indx, index=indx)

In [32]:
cmDF

Unnamed: 0,Ground,Natural,Building,Utility_Line,Pole,Car,Fence
Ground,3941977,2843,6560,1694,678,29844,12042
Natural,9411,1131956,75860,6595,8779,30922,28914
Building,7628,56644,386419,6726,14241,612,66804
Utility_Line,0,2107,1330,20382,9212,6,4164
Pole,124,2262,1256,1584,65012,189,1182
Car,14644,4210,6304,800,281,169391,19328
Fence,1,427,205,91,140,860,2845


In [33]:
cmNormF = pd.DataFrame(cmNorm, columns = indx, index=indx)

np.float64(1.0115744378130964)