In [1]:
import pickle
import numpy as np
import open3d as o3d
from sklearn.ensemble import RandomForestClassifier
import pdal
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plyfile import PlyData, PlyElement

sns.set()

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
ROOT_PLY = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/L001.ply"""

In [3]:
pipeline_json = [
    {
        "type": "readers.ply",
        "filename": ROOT_PLY
    }
        
]


pipeline = pdal.Pipeline(json.dumps(pipeline_json))

# Execute the pipeline
# This will process the data according to the stages defined in the pipeline
pipeline.execute()

xy = pipeline.arrays[0]

mta = pipeline.metadata



In [4]:
xyzT = np.array((xy["X"], xy["Y"], xy["Z"])).T
xyz = xyzT - np.min(xyzT, axis = 0)

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(xyz)

In [5]:
ROOT  = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_23T22_14_L001_r_0_1_grid_0_02_features.npz"""
ROOT_2  = """/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/2025_10_23T23_39_L001_r_0_2_grid_0_04_features.npz"""

In [6]:
def labelPoints(row):
    if row == 0:
        return "Unclassified"
    elif row == 1:
        return "Ground"
    elif row == 2:
        return "Road_markings"
    elif row == 3:
        return "Natural"
    elif row == 4:
        return "Building"
    elif row == 5:
        return "Utility_line"
    elif row == 6:
        return "Pole"
    elif row == 7:
        return "Car"
    elif row == 8:
        return "Fence"
    else:
        return -1

In [7]:
da = np.load(ROOT)

In [8]:
da2 = np.load(ROOT_2)

In [9]:
xyz = da['array1']
features = da['array2']
features2 = da2['array2']
cls = da['array3']

In [10]:
"""
Unclassified 0
Ground 1
Road_markings 2
Natural 3
Building 4
Utility_line 5
Pole 6
Car 7
Fence 8
"""

'\nUnclassified 0\nGround 1\nRoad_markings 2\nNatural 3\nBuilding 4\nUtility_line 5\nPole 6\nCar 7\nFence 8\n'

In [11]:
cols = ["EigenSum","omnivariance","entropy","linearity","planarity","sphericity","curvature","verticality1","verticality2","count"]

In [12]:
cols1 = [f"""{a}_radius1""" for a in cols]
cols2 = [f"""{a}_radius2""" for a in cols]

allCols = cols1 + cols2
    
    
    

In [13]:
allCols = ['X', 'Y','Z'] + allCols + ['label']

In [14]:
allAtrs = np.hstack((xyz,features,features2, cls.reshape(-1, 1)))

In [15]:
total_dataframe = pd.DataFrame(allAtrs, columns=allCols)
total_dataframe["label"] = total_dataframe["label"].astype(int)
total_dataframe["count_radius1"] = total_dataframe["count_radius1"].astype(int)
total_dataframe["count_radius2"] = total_dataframe["count_radius2"].astype(int)

# total_dataframe["labelName"] = total_dataframe["label"].apply(labelPoints)

In [16]:
# ss = ss[ss["count"] > 10]

In [17]:
"""
Unclassified 0
Ground 1
Road_markings 2
Natural 3
Building 4
Utility_line 5
Pole 6
Car 7
Fence 8
"""

'\nUnclassified 0\nGround 1\nRoad_markings 2\nNatural 3\nBuilding 4\nUtility_line 5\nPole 6\nCar 7\nFence 8\n'

In [18]:
cls[cls == 0]

array([0, 0, 0, ..., 0, 0, 0], shape=(347037,))

In [19]:
# for i in range(9):
#     print("""percent for class %d: %.4f""" % (i, 100*(cls[cls == i].shape[0]/cls.shape[0])))

In [20]:
# total_dataframe.reset_index(inplace = True)

In [21]:
# tpsOut = []
# for idx, tpe in total_dataframe.dtypes.to_dict().items():
#     if tpe == 'int64':
#         tpsOut.append((idx, 'i4'))
#     elif tpe == 'float64':
#         tpsOut.append((idx, 'f8'))
        
# vertex_data = np.empty(allAtrs.shape[0], dtype=tpsOut)

# for t in tpsOut:
#     vertex_data[t[0]] = total_dataframe[t[0]].values

    
# el = PlyElement.describe(vertex_data, 'vertex')

# # Create a PlyData object and write to a PLY file
# # Set text=True for ASCII PLY, or text=False for binary PLY
# PlyData([el], text=False).write("""/home/sspiegel/CapstoneData/Paris/Toronto_3D/PC_with_features/L001_features.ply""")

In [22]:

# tpsOut

In [23]:
rf = RandomForestClassifier(verbose=1, n_jobs=-1)

In [24]:
rf

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [25]:
total_dataframe.reset_index(inplace = True)

In [26]:
Xy = total_dataframe.to_numpy()

X = Xy[:,4:-1]
y = total_dataframe["label"].to_numpy()

In [102]:
idx = np.random.randint(low=0, high=X.shape[0],size = 100000)    

In [103]:
idx

array([8136783, 8465593, 1048984, ..., 3698086, 2116583, 2637670],
      shape=(100000,))

In [104]:
X_beta = X[idx]
y_beta = y[idx]

In [105]:
rf.fit(X_beta, y_beta)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    9.9s finished


0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [40]:

# fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=800)
# tree.plot_tree(rf.estimators_[0],
#                feature_names = X_beta, 
#                class_names=y_beta,
#                filled = True);
# # fig.savefig('rf_individualtree.png')

In [106]:
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': -1,
 'oob_score': False,
 'random_state': None,
 'verbose': 1,
 'warm_start': False}

In [107]:
idx2 = np.random.randint(0, X.shape[0], size = 10000)

In [108]:
Xtest = X[idx2]

In [109]:
ytest = y[idx2]

In [110]:
yp = rf.predict(Xtest)

[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    0.0s finished


In [111]:
(yp == ytest).sum()

np.int64(8838)