In [1]:
import numpy as np
import lightgbm as lgb
import pypcs
import os
import gc

from sklearn.metrics import accuracy_score
from utils import download_pix4d_dataset, load_point_cloud, generate_features, estimate_ious


In [2]:
USE_COLORS = True # if False only geometric features will be used
DATASET_PATH = "datasets" # Name of folder for datasets (relative to the root of the repository)

# Parameters for FeatureEstimator class (set according to the paper)
feature_estimator_kwargs = {
    "voxel_size": 0.05, # size of voxel for first level of scale pyramid
    "num_neighbors": 10, # number of neighbors used for point features estimation
    "num_scales": 9, # number of downsampling levels
    "batch_size": 10000, # number of points to process in parallel in the case of batched iteration
}

In [3]:
# Download dataset and unpack it (internet connection required)
# You can download it manually from pix4d site and unpack in DATASET_PATH folder
download_pix4d_dataset(DATASET_PATH)

Downloading dataset. It will take a while
Unpacking dataset


In [4]:
# Load point cloud with labels and prepare features
train_filename = os.path.join(DATASET_PATH, "cadastre.xyz")
point_cloud = pypcs.PointCloud()
point_cloud, labels = load_point_cloud(train_filename, point_cloud, USE_COLORS)
feature_estimator = pypcs.FeatureEstimator(point_cloud, **feature_estimator_kwargs)
features = generate_features(feature_estimator)

Loading dataset cadastre.xyz: 100%|██████████| 5771358/5771358 [01:24<00:00, 68454.52it/s]
Calculate point features: 100%|██████████| 578/578 [05:43<00:00,  1.68it/s]


In [5]:
# Parameters for gradient boosting (set according to the paper)
lgbm_kwargs = {
    "num_leaves": 16,
    "learning_rate": 0.2,
}

# Train gradient boosting classifier
estimator = lgb.LGBMClassifier(**lgbm_kwargs).fit(features, labels)


In [6]:
# Delete unused variables to free some RAM
del features, labels
gc.collect()

140

In [7]:
# Load test dataset and use trained classifier to obtain semantic segmentation results
test_filename = os.path.join(DATASET_PATH, "ankeny.xyz")
point_cloud, labels = load_point_cloud(test_filename, point_cloud, USE_COLORS)
feature_estimator = pypcs.FeatureEstimator(point_cloud, **feature_estimator_kwargs)
features = generate_features(feature_estimator)
predicted_probs = estimator.predict_proba(features)
predicted_indices = np.argmax(predicted_probs, axis=1)
predicted_labels = [estimator.classes_[x] for x in predicted_indices]

# Calculate metrics
print('Accuracy = {:.4%}'.format(accuracy_score(labels, predicted_labels)))
mean_iou, ious = estimate_ious(labels, predicted_labels, estimator.n_classes_)
print('Mean IoU = {:.4%}'.format(mean_iou))

Loading dataset ankeny.xyz: 100%|██████████| 8924118/8924118 [02:01<00:00, 73335.22it/s]
Calculate point features: 100%|██████████| 893/893 [08:08<00:00,  1.83it/s]


Accuracy = 83.4713%


TypeError: unsupported format string passed to list.__format__

In [8]:
# Since every point classified independently, results contain some amount of "salt and pepper" noise
# we can supress this noise by averaging class labels with respect to the neighboring points classes,
# either with majority voting scheme (this method can be used directly with class labels) ...
predicted_labels = feature_estimator.hard_voting_smoothing(predicted_labels, num_neighbors = 20)
print('Accuracy = {:.4%}'.format(accuracy_score(labels, predicted_labels)))
mean_iou, ious = estimate_ious(labels, predicted_labels, estimator.n_classes_)
print('Mean IoU = {:.4%}'.format(mean_iou))

Accuracy = 84.4069%
Mean IoU = 48.7825%


In [9]:
# ... either with soft voting scheme 
# (note that this method requre class probabilities instead of class labels, but can give slightly better results)
predicted_indices = feature_estimator.soft_voting_smoothing(predicted_probs, num_neighbors = 20)
predicted_labels = [estimator.classes_[x] for x in predicted_indices]
print('Accuracy = {:.4%}'.format(accuracy_score(labels, predicted_labels)))
mean_iou, ious = estimate_ious(labels, predicted_labels, estimator.n_classes_)
print('Mean IoU = {:.4%}'.format(mean_iou))

Afteer smoothinng
Accuracy = 84.6466%
Mean IoU = 49.0871%


In [10]:
# Now we can write semantic segmentation results to ply file (can be opened with Meshlab)...
from utils import write_ply
color_maapping = {
    2 : [245, 14, 14],
    5 : [14, 245, 29],
    6 : [6, 57, 223],
    11 : [6, 6, 6],
    66 : [245, 252, 60],
    67 : [9, 243, 243],
}
points = point_cloud.get_points()
points = points - points.min(axis=0) # subtract min value to avoid underflow of float32 values
colors = np.array([color_maapping[l] for l in predicted_labels], dtype=np.uint8)
write_ply('predicted.ply', [points, colors], ["x", "y", "z", "red", "green", "blue"])

In [11]:
# ... or visualize directly in the browser
import pyvista as pv

visualize_every = 10 # browser visualization can be difficult, so subsample point cloud
points = point_cloud.get_points()[::visualize_every, :]
points = points - points.min(axis=0) # subtract min value to avoid underflow of float32 values
viz_cloud = pv.PolyData(points.astype(np.float32))
viz_cloud['labels'] = np.array(predicted_indices, dtype=np.uint32)[::visualize_every]
viz_cloud.plot(cmap='gist_rainbow')