## Import Modules

In [1]:
import pdal
import sys
import os
import json
sys.path.append("../../../PythonScripts")
from glob import glob
import open3d as o3d
import os
import numpy as np
import pandas as pd
from multiprocessing import Pool
from pipeline_functions import downscale_las, get_scales, downscale_ply, getFeatures,getFeaturesParallel
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from time import time
# from scipy.spatial import cKDTree
from sklearn.neighbors import KDTree, BallTree
# from scipy.spatial import KDTree
import pickle
from datetime import datetime, timezone
from itertools import repeat # Used for starmap function
import gc # Used to free up memory to prevent kernel restarting

sns.set()

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


# Get number of processors to run loop

In [2]:
processors = os.cpu_count() - 8

## Get File to run

In [3]:
ROOT = """/home/sspiegel/CapstoneData/Ohio/dales_semantic_segmentation_las/dales_las/train/5110_54320.las"""

In [4]:
fiNm = ROOT.split("/")[-1].split(".")[0]

## Load in base point cloud

In [5]:
pipeline_json = [
    {
        "type": "readers.las",
        "filename": ROOT
    }
        
]


pipeline = pdal.Pipeline(json.dumps(pipeline_json))

# Execute the pipeline
# This will process the data according to the stages defined in the pipeline
pipeline.execute()

xy = pipeline.arrays[0]

mta = pipeline.metadata



In [6]:
xyzT = np.array((xy["X"], xy["Y"], xy["Z"])).T

In [7]:
xyz = xyzT - np.min(xyzT, axis = 0)

In [8]:
# xyz

## Get Classification

In [9]:
cls = xy["Classification"].astype(int)

In [10]:
# scls = get_scales() # Use base paprameters from function
scls = get_scales(r_0=0.4, S = 6,gamma=1.5, rho = 8)

## Instantiate point cloud

In [11]:
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(xyz)

In [12]:
pc_dict = []
for s in scls:
    
    pc = pcd.voxel_down_sample(s[1])
    # pc_array = np.asarray(pc.points)
    pc_dict.append({
      "r" : s[0],
     "grid_size" : s[1],
     "point_cloud" : pc
     # "point_cloud_array" : pc_carray
    })


## Get scaling factors

### Downscale point clouds: Methodology

* Downscale point clouds based on multiscaling factor
* The scaling factor chosen from [Thomas Hugues et. al](https://ieeexplore.ieee.org/document/8490990/)
* Parameters
  * initial radius ($r_0$): 0.1
  * Number of scales ($S$): 6
  * Base exponent for expanding radius ($\gamma$): 2
  * scaling factor of grid ($\rho$): 5
  * radius of sphere for scale $s$: $r_s = r_0 * \gamma^{s}$
  * downsampled voxel size: $\frac{r_s}{\rho}$

* Use multiprocessing to speed up process

### Sample point cloud (first radius)

In [13]:
test = pc_dict[0]

test_pc = test["point_cloud"]

# tree = BallTree(test_pc.points)
tree = KDTree(test_pc.points)

In [14]:
print(test)

{'r': 0.4, 'grid_size': 0.05, 'point_cloud': PointCloud with 17549351 points.}


In [15]:
test_pc_arr = np.asarray(test_pc.points)

## Get Distance matrix

In [16]:
distMat = tree.query_radius(pcd.points, r=test["r"])
# distMat = tree.query_ball_point(pcd.points, r=test["r"], workers = 1)


In [17]:
pcOut = np.asarray(pcd.points)


In [18]:
del pc_dict, xyzT, xyz, xy, pcd

gc.collect()

20

## Split and apply features algorithmKDTree

In [19]:
# DIV = 100000
# groups = distMat.shape[0] // DIV

In [20]:
s = time()

bigList = []

# for g in range(groups + 1):
#     print("""Processing batch %d...""" % (g+1))
#     practiceList = distMat[g*DIV:(g+1)*DIV]

args = zip(repeat(test_pc_arr), distMat)
with Pool(processes=processors) as pool:
    result = pool.starmap(getFeaturesParallel, args)
bigList = np.array(result)
    # bigList.append(result)
    # print(practiceList.shape)
    # break
    
e = time()

print("""\n\n\n\nDone!!!  Total time: %.4f seconds""" % (e - s))





Done!!!  Total time: 372.0664 seconds


In [21]:
# bigList = np.concatenate(bigList)

## Save as a pickle file

In [22]:
del distMat, tree

gc.collect()

0

In [23]:
bigList

array([[6.52489796e-02, 1.04756993e-02, 2.13712878e-01, ...,
        6.62622608e-01, 9.03062902e-01, 7.00000000e+00],
       [2.76979592e-02, 7.67072351e-03, 1.25417013e-01, ...,
        7.83819018e-01, 7.33585388e-01, 7.00000000e+00],
       [4.77418368e-02, 7.51999260e-03, 1.76536541e-01, ...,
        5.76776126e-01, 9.25258878e-01, 7.00000000e+00],
       ...,
       [4.69950000e-02, 5.26948994e-03, 1.70681519e-01, ...,
        5.31693189e-02, 1.45458915e+00, 1.00000000e+01],
       [4.39851852e-02, 3.36419044e-03, 1.65027622e-01, ...,
        6.77203169e-02, 1.49058425e+00, 9.00000000e+00],
       [4.84842975e-02, 1.03242873e-02, 1.81432541e-01, ...,
        1.98165834e-01, 1.24497053e+00, 1.10000000e+01]],
      shape=(17747769, 10))

In [24]:
# pickleOut = f"""/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/{datetime.now(timezone.utc).strftime("%Y_%m_%dT%H_%M")}_L001_r_{str(test["r"]).split('.')[0]}_{str(test["r"]).split('.')[1]}_grid_{str(test["grid_size"]).split('.')[0]}_{str(test["grid_size"]).split('.')[1]}.pkl"""

In [25]:
cls

array([2, 2, 2, ..., 1, 1, 2], shape=(17747769,))

In [26]:
getTime = datetime.now(timezone.utc).strftime("%Y_%m_%dT%H_%M")

featureOut = f"""/home/sspiegel/CapstoneData/Ohio/dales_semantic_segmentation_las/dales_las/train/pickleFiles/radial/{getTime}_{fiNm}_r_{str(test["r"]).split('.')[0]}_{str(test["r"]).split('.')[1]}_grid_{str(test["grid_size"]).split('.')[0]}_{str(test["grid_size"]).split('.')[1]}_features.npz"""
# pickleOutPC = f"""/home/sspiegel/CapstoneData/Paris/Toronto_3D/pickleFiles/{getTime}_L001_r_{str(test["r"]).split('.')[0]}_{str(test["r"]).split('.')[1]}_grid_{str(test["grid_size"]).split('.')[0]}_{str(test["grid_size"]).split('.')[1]}_pointcloud.pkl"""

# outDict = { "features" : bigList, "classification" : cls}
# # outDict = {"PointCloud" : pcOut}


# with open(pickleOut, 'wb') as f:
#     pickle.dump(outDict, f)

np.savez(featureOut,array1 = pcOut, array2 = bigList, array3 = cls)

## Save stats

In [27]:
sts = f"""/home/sspiegel/CapstoneData/Paris/training_10_classes/stats/{datetime.now(timezone.utc).strftime("%Y_%m_%d")}_r_{str(test["r"]).split('.')[0]}_{str(test["r"]).split('.')[1]}_grid_{str(test["grid_size"]).split('.')[0]}_{str(test["grid_size"]).split('.')[1]}_features.json"""


In [28]:
jsStats = {
    "filename" : ROOT,
    "radius" : test["r"],
    "grid_size" : test["grid_size"],
    "total_time" : "%.4f seconds" % (e - s)
}

In [29]:
with open(sts, "w") as jsOut:
    jsOut.write(json.dumps(jsStats))


In [30]:
scls

[(0.4, 0.05),
 (0.6000000000000001, 0.07500000000000001),
 (0.9, 0.1125),
 (1.35, 0.16875),
 (2.025, 0.253125),
 (3.0375, 0.3796875)]

In [31]:
test

{'r': 0.4, 'grid_size': 0.05, 'point_cloud': PointCloud with 17549351 points.}