## Model Training, Inference and Evaluation Module

In this notebook we first train the segmentation model, plot logging plots and perform evaluation on the trained apple segmentation task.

#### 1. Open3D's RanLA-Net Model Setup and Training

In this section we declare a custom dataloader object for Open3D's segmentation model training and then plot the tensorboard logger plots for better model learning understanding.

#### 2. Model Performance Inference and Evaluation

In this section we evaluate performance of the trained segmentation model, and also plot the final segmentation outputs for better qualitative understanding.

In [None]:
# for loading the dataset into the runtime from google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# installing open3d library for importing RandLa-Net model implementation
!pip install open3d

In [None]:
# cloning repository for building the creating custom dataloaders
!git clone https://github.com/isl-org/Open3D-ML

In [None]:
# installing compatible pytorch version
!pip install -r Open3D-ML/requirements-torch-cuda.txt
# restart the runtime before execution of further code

In [1]:
# checking the torch installed version
# latest version 1.13.0+cu116 is not compatible with the 
import torch
print(torch.__version__)

1.13.1+cu116


In [2]:
# setting up for the Open3D-ML package after all the environment restarts
import sys
sys.path.insert(0,'Open3D-ML')

In [3]:
# checking the path variables for the Open3D-ML package source code
from ml3d.datasets.base_dataset import BaseDataset, BaseDatasetSplit
from ml3d.utils import make_dir, DATASET

In [None]:
# further open3d-ml's ml3d related import statements
import open3d.ml as _ml3d
import open3d.ml.torch as ml3d

In [9]:
# loading the already extracted fuji-sfm dataset for model training into current runtime
# TODO: update the corresponding path variables based on your project setup
!ls drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset

test  train  valid


### 1. Open3D's RanLA-Net Model Setup and Training

In [4]:
# custom data set loader code, loads the '.npy' files from train, val & test directories
import numpy as np
import os, sys, glob, pickle
from pathlib import Path
from os.path import join, exists, dirname, abspath
from sklearn.neighbors import KDTree
from tqdm import tqdm
import logging

# from .base_dataset import BaseDataset, BaseDatasetSplit
# from ..utils import make_dir, DATASET
# updated paths for the ml3d github source code usage
from ml3d.datasets.base_dataset import BaseDataset, BaseDatasetSplit
from ml3d.utils import make_dir, DATASET

log = logging.getLogger(__name__)
# Expect point clouds to be in npy format with train, val and test files in separate folders.
# Expected format of npy files : ['x', 'y', 'z', 'class', 'feat_1', 'feat_2', ........,'feat_n'].
# For test files, format should be : ['x', 'y', 'z', 'feat_1', 'feat_2', ........,'feat_n'].

class Custom3DSplit(BaseDatasetSplit):
    """This class is used to create a custom dataset split.
    Initialize the class.
    Args:
        dataset: The dataset to split.
        split: A string identifying the dataset split that is usually one of
        'training', 'test', 'validation', or 'all'.
        **kwargs: The configuration of the model as keyword arguments.
    Returns:
        A dataset split object providing the requested subset of the data.
    """

    def __init__(self, dataset, split='training'):
        super().__init__(dataset, split=split)
        self.cfg = dataset.cfg
        path_list = dataset.get_split_list(split)
        log.info("Found {} pointclouds for {}".format(len(path_list), split))
        self.path_list = path_list
        self.split = split
        self.dataset = dataset

    def __len__(self):
        return len(self.path_list)

    def get_data(self, idx):
        pc_path = self.path_list[idx]
        data = np.load(pc_path)
        
        points = np.array(data[:, :3], dtype=np.float32)
        feat = np.array(data[:, 3:6], dtype=np.float32)
        labels = np.array(data[:, 6], dtype=np.int32).reshape((-1,))
        # no normal estimate fetures only points, rgb features and labels as inputs
        data = {'point': points,  'feat': feat, 'label': labels} 
        return data

    def get_attr(self, idx):
        pc_path = Path(self.path_list[idx])
        name = pc_path.name.replace('.npy', '')
        attr = {'name': name, 'path': str(pc_path), 'split': self.split}
        return attr


class Custom3D(BaseDataset):
    """A template for customized dataset that you can use with a dataloader to
    feed data when training a model. This inherits all functions from the base
    dataset and can be modified by users. Initialize the function by passing the
    dataset and other details.
    Args:
        dataset_path: The path to the dataset to use.
        name: The name of the dataset.
        cache_dir: The directory where the cache is stored.
        use_cache: Indicates if the dataset should be cached.
        num_points: The maximum number of points to use when splitting the dataset.
        ignored_label_inds: A list of labels that should be ignored in the dataset.
        test_result_folder: The folder where the test results should be stored.
    """

    def __init__(self,
                 dataset_path,
                 name='Custom3D',
                 cache_dir='./logs/cache',
                 use_cache=False,
                 num_points=65536,
                 ignored_label_inds=[],
                 test_result_folder='./test',
                 **kwargs):

        super().__init__(dataset_path=dataset_path,
                         name=name,
                         cache_dir=cache_dir,
                         use_cache=use_cache,
                         num_points=num_points,
                         ignored_label_inds=ignored_label_inds,
                         test_result_folder=test_result_folder,
                         **kwargs)

        cfg = self.cfg

        self.dataset_path = cfg.dataset_path

        self.label_to_names = self.get_label_to_names()

        self.num_classes = len(self.label_to_names)
        self.label_values = np.sort([k for k, v in self.label_to_names.items()])
        self.label_to_idx = {l: i for i, l in enumerate(self.label_values)}
        self.ignored_labels = np.array(cfg.ignored_label_inds)

        self.train_dir = str(Path(cfg.dataset_path) / cfg.train_dir)
        self.val_dir = str(Path(cfg.dataset_path) / cfg.val_dir)
        self.test_dir = str(Path(cfg.dataset_path) / cfg.test_dir)
        print(self.train_dir, self.val_dir, self.test_dir)
        self.train_files = [f for f in glob.glob(self.train_dir + "/*.npy")]
        self.val_files = [f for f in glob.glob(self.val_dir + "/*.npy")]
        self.test_files = [f for f in glob.glob(self.test_dir + "/*.npy")]

        print("Training Data List: " ,self.train_files, "\nValidation Data List: ", self.val_files, "\nTesting Data List: ", self.test_files)

    @staticmethod
    def get_label_to_names():
        """Returns a label to names dictionary object.
        Returns:
            A dict where keys are label numbers and
            values are the corresponding names.
        """
        label_to_names = {0: 'background', 1: 'apple'}
        return label_to_names

    def get_split(self, split):
        """Returns a dataset split.
        Args:
            split: A string identifying the dataset split that is usually one of
            'training', 'test', 'validation', or 'all'.
        Returns:
            A dataset split object providing the requested subset of the data.
        """
        return Custom3DSplit(self, split=split)

    def get_split_list(self, split):
        """Returns a dataset split.
        Args:
            split: A string identifying the dataset split that is usually one of
            'training', 'test', 'validation', or 'all'.
        Returns:
            A dataset split object providing the requested subset of the data.
        Raises:
             ValueError: Indicates that the split name passed is incorrect. The
             split name should be one of 'training', 'test', 'validation', or
             'all'.
        """
        if split in ['test', 'testing']:
            self.rng.shuffle(self.test_files)
            return self.test_files
        elif split in ['val', 'validation']:
            self.rng.shuffle(self.val_files)
            return self.val_files
        elif split in ['train', 'training']:
            self.rng.shuffle(self.train_files)
            return self.train_files
        elif split in ['all']:
            files = self.val_files + self.train_files + self.test_files
            return files
        else:
            raise ValueError("Invalid split {}".format(split))

    def is_tested(self, attr):
        """Checks if a datum in the dataset has been tested.
        Args:
            dataset: The current dataset to which the datum belongs to.
            attr: The attribute that needs to be checked.
        Returns:
            If the dataum attribute is tested, then return the path where the
            attribute is stored; else, returns false.
        """
        cfg = self.cfg
        name = attr['name']
        path = cfg.test_result_folder
        store_path = join(path, self.name, name + '.npy')
        if exists(store_path):
            print("{} already exists.".format(store_path))
            return True
        else:
            return False

    def save_test_result(self, results, attr):
        """Saves the output of a model.
        Args:
            results: The output of a model for the datum associated with the attribute passed.
            attr: The attributes that correspond to the outputs passed in results.
        """
        cfg = self.cfg
        name = attr['name']
        path = cfg.test_result_folder
        make_dir(path)
        pred = results['predict_labels']
        pred = np.array(self.label_to_names[pred])
        store_path = join(path, name + '.npy')
        np.save(store_path, pred)

DATASET._register_module(Custom3D)

In [10]:
# torch related import statements
import os
import random
import open3d.ml as _ml3d
import open3d.ml.torch as ml3d
# general import statements
import logging
import numpy as np
from tqdm import tqdm
from pathlib import Path
import os, sys, glob, pickle
from sklearn.neighbors import KDTree
from os.path import join, exists, dirname, abspath

In [18]:
# loading the previous checkpoint for further training after runtime disconnect
!ls drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/RandLANet_fuji-apple-segmentation_torch/checkpoint/ckpt_00018.pth

drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/RandLANet_fuji-apple-segmentation_torch/checkpoint/ckpt_00018.pth


In [None]:
# RandLA-Net model configuration file
!cat Open3D-ML/ml3d/configs/randlanet_s3dis.yml
# modifying this file based on the pipe dataset requirements with %%writefile command
# for custom dataset we are modifying configurations like, data subset paths, classes, class weights etc.

In [5]:
%%writefile Open3D-ML/ml3d/configs/randlanet_s3dis.yml
dataset:
  name: fuji-apple-segmentation
  dataset_path: drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset
  train_dir: train
  val_dir: valid
  test_dir: test
  cache_dir: drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/cache
  class_weights: []
  ignored_label_inds: []
  num_points: 40960
  test_area_idx: 1
  test_result_folder: ./test
  use_cache: False
model:
  name: RandLANet
  batcher: DefaultBatcher
  ckpt_path: drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/RandLANet_fuji-apple-segmentation_torch/checkpoint/ckpt_00018.pth
  num_neighbors: 16
  num_layers: 5
  num_points: 40960
  num_classes: 2
  ignored_label_inds: []
  sub_sampling_ratio: [4, 4, 4, 4, 2]
  in_channels: 6
  dim_features: 8
  dim_output: [16, 64, 128, 256, 512]
  grid_size: 0.04
  augment:
    recenter:
      dim: [0, 1]
    rotate:
      method: vertical
    scale:
      min_s: 0.9
      max_s: 1.1
    noise:
      noise_std: 0.001
pipeline:
  name: SemanticSegmentation
  optimizer:
    lr: 0.00025
  batch_size: 2
  main_log_dir: drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs
  max_epoch: 45
  save_ckpt_freq: 9
  scheduler_gamma: 0.99
  test_batch_size: 3
  train_sum_dir: drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/train_log
  val_batch_size: 5
  summary:
    record_for: []
    max_pts:
    use_reference: false
    max_outputs: 1

Overwriting Open3D-ML/ml3d/configs/randlanet_s3dis.yml


In [9]:
# removing 'class_weights' vector for model training, know logged bug
# otherwise, [1,3] (or [1, num_classes]) incompatible shape error is produced
cfg_file = "Open3D-ML/ml3d/configs/randlanet_s3dis.yml"
cfg = _ml3d.utils.Config.load_from_file(cfg_file)
model = ml3d.models.RandLANet(**cfg.model)

In [11]:
# verifying the updated config file for the custom data loader and RandLa-net model
print(cfg.dataset)
print(cfg.pipeline)

{'name': 'fuji-apple-segmentation', 'dataset_path': 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset', 'train_dir': 'train', 'val_dir': 'valid', 'test_dir': 'test', 'cache_dir': 'drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/cache', 'class_weights': [], 'ignored_label_inds': [], 'num_points': 40960, 'test_area_idx': 1, 'test_result_folder': './test', 'use_cache': False}
{'name': 'SemanticSegmentation', 'optimizer': {'lr': 0.00025}, 'batch_size': 2, 'main_log_dir': 'drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs', 'max_epoch': 45, 'save_ckpt_freq': 9, 'scheduler_gamma': 0.99, 'test_batch_size': 3, 'train_sum_dir': 'drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/train_log', 'val_batch_size': 5, 'summary': {'record_for': [], 'max_pts': None, 'use_reference': False, 'max_outputs': 1}}


In [12]:
dataset = Custom3D(cfg.dataset.pop('dataset_path', None), **cfg.dataset)

drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/valid drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/test
Training Data List:  ['drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_0_0.npy', 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_0_1.npy', 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_0_2.npy', 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_0_3.npy', 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_1_0.npy', 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_1_1.npy', 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_1_2.npy', 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/train/data_patch_0_1_3.npy', 'drive/MyDrive/po

In [13]:
# exploration: verifying whether loaded point cloud data is correct
# get the 'all' split that combines training, validation and test set
all_split = dataset.get_split('all')
print(all_split)
# print the attributes of the first datum
print(all_split.get_attr(0))
# print the shape of the first point cloud
print(all_split.get_data(0)['point'].shape)

<__main__.Custom3DSplit object at 0x7f0ef6658c70>
{'name': 'data_patch_2_3_1', 'path': 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/valid/data_patch_2_3_1.npy', 'split': 'all'}
(24305, 3)


In [14]:
# creating the segmentation pipeline for RandLA-Net model training
pipeline = ml3d.pipelines.SemanticSegmentation(model, dataset=dataset, device="auto", **cfg.pipeline)
ckpt_path = "drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/RandLANet_fuji-apple-segmentation_torch/checkpoint/ckpt_00018.pth"
pipeline.load_ckpt(ckpt_path=ckpt_path)

In [None]:
# semseg file updation for adding label smoothing and
# additional penalization for apple class misclassification
!cat /usr/local/lib/python3.9/dist-packages/open3d/_ml3d/torch/modules/losses/semseg_loss.py

In [15]:
%%writefile /usr/local/lib/python3.9/dist-packages/open3d/_ml3d/torch/modules/losses/semseg_loss.py
import torch
import torch.nn as nn

from ....datasets.utils import DataProcessing


def filter_valid_label(scores, labels, num_classes, ignored_label_inds, device):
    """Loss functions for semantic segmentation."""
    valid_scores = scores.reshape(-1, num_classes).to(device)
    valid_labels = labels.reshape(-1).to(device)

    ignored_bool = torch.zeros_like(valid_labels, dtype=torch.bool)
    for ign_label in ignored_label_inds:
        ignored_bool = torch.logical_or(ignored_bool,
                                        torch.eq(valid_labels, ign_label))

    valid_idx = torch.where(torch.logical_not(ignored_bool))[0].to(device)

    valid_scores = torch.gather(valid_scores, 0,
                                valid_idx.unsqueeze(-1).expand(-1, num_classes))
    valid_labels = torch.gather(valid_labels, 0, valid_idx)

    # Reduce label values in the range of logit shape
    reducing_list = torch.arange(0, num_classes, dtype=torch.int64)
    inserted_value = torch.zeros([1], dtype=torch.int64)

    for ign_label in ignored_label_inds:
        if ign_label >= 0:

            reducing_list = torch.cat([
                reducing_list[:ign_label], inserted_value,
                reducing_list[ign_label:]
            ], 0)
    valid_labels = torch.gather(reducing_list.to(device), 0,
                                valid_labels.long())

    return valid_scores, valid_labels


class SemSegLoss(object):
    """Loss functions for semantic segmentation."""

    def __init__(self, pipeline, model, dataset, device):
        super(SemSegLoss, self).__init__()
        # weighted_CrossEntropyLoss
        if 'class_weights' in dataset.cfg.keys() and len(
                dataset.cfg.class_weights) != 0:
            class_wt = DataProcessing.get_class_weights(
                dataset.cfg.class_weights)
            weights = torch.tensor(class_wt, dtype=torch.float, device=device)

            self.weighted_CrossEntropyLoss = nn.CrossEntropyLoss(weight=weights)
        else:
            weights = torch.tensor([1.0, 4.0], dtype=torch.float).squeeze(-1)
            self.weighted_CrossEntropyLoss = nn.CrossEntropyLoss(weight=weights, ignore_index=2, label_smoothing=0.05)

Overwriting /usr/local/lib/python3.9/dist-packages/open3d/_ml3d/torch/modules/losses/semseg_loss.py


In [None]:
# training the model from scratch with the pipe dataset
# uncomment during model training stage
# pipeline.run_train()
# error, incorporation of 'class_weights' from the configuration file give shape head error

In [None]:
# for loading the training logs to observe the learning curve during training 
%load_ext tensorboard
%tensorboard --logdir drive/MyDrive/point-cloud-prototyping/fuji_sfm_logs/train_log

In [20]:
# generating sample test point cloud outputs from the trained models
# for five data patches for more in-depth and systematic analysis
# at data patch performance level of system performance
test_split = dataset.get_split("test")

data_one = test_split.get_data(0)
result_one = pipeline.run_inference(data_one)

data_two = test_split.get_data(1)
result_two = pipeline.run_inference(data_two)

data_three = test_split.get_data(2)
result_three = pipeline.run_inference(data_three)

data_four = test_split.get_data(3)
result_four = pipeline.run_inference(data_four)

data_five = test_split.get_data(4)
result_five = pipeline.run_inference(data_five)


test 0/16: 100%|██████████| 605/605 [04:38<00:00,  2.17it/s]

test 0/1: 100%|██████████| 646/646 [00:13<00:00, 47.26it/s]

test 0/1: 100%|██████████| 157/157 [00:10<00:00, 15.11it/s]

test 0/1: 100%|██████████| 186/186 [00:13<00:00, 14.24it/s]
test 0/1: 100%|██████████| 525/525 [00:06<00:00, 82.70it/s]
test 0/1: 100%|██████████| 525/525 [00:12<00:00, 40.50it/s]

test 0/1: 100%|██████████| 371/371 [00:07<00:00, 46.42it/s][A

## 2.a Model Performance Inference and Evaluation

In [18]:
# data patch sample specific testing
# for obtaining basic accuracy related metrics
# to measure the initial model performannce quantitatively
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
# jaccard score being used for mIoU calculation
from sklearn.metrics import jaccard_score
from sklearn.metrics import accuracy_score, f1_score, \
precision_score, recall_score, classification_report, confusion_matrix
import numpy as np

In [36]:
# point cloud size for the data patches under consideration
print(result_one['predict_labels'].shape)
print(result_two['predict_labels'].shape)
print(result_three['predict_labels'].shape)
print(result_four['predict_labels'].shape)
print(result_five['predict_labels'].shape)

(44694,)
(8943,)
(9238,)
(30636,)
(20633,)


In [25]:
# data patch one metric result summary
print("Average Accuracy: " + str(accuracy_score(data_one['label'], result_one['predict_labels'])))
matrix = confusion_matrix(data_one['label'], result_one['predict_labels'])
print("Class Level Accuracy: " + str(matrix.diagonal()/matrix.sum(axis=1)))
print("Class Level mIoU: " + str(jaccard_score(data_one['label'], result_one['predict_labels'], average=None)))

Average Accuracy: 0.12218642323354366
Class Level Accuracy: [0.08935862 0.88979815]
Class Level mIoU: [0.08893946 0.03991288]


In [26]:
# data patch two metric result summary
print("Average Accuracy: " + str(accuracy_score(data_two['label'], result_two['predict_labels'])))
matrix = confusion_matrix(data_two['label'], result_two['predict_labels'])
print("Class Level Accuracy: " + str(matrix.diagonal()/matrix.sum(axis=1)))
print("Class Level mIoU: " + str(jaccard_score(data_two['label'], result_two['predict_labels'], average=None)))

Average Accuracy: 0.8397629430839763
Class Level Accuracy: [0.85693294 0.75154427]
Class Level mIoU: [0.81740571 0.43314873]


In [28]:
# data patch three metric result summary
print("Average Accuracy: " + str(accuracy_score(data_three['label'], result_three['predict_labels'])))
matrix = confusion_matrix(data_three['label'], result_three['predict_labels'])
print("Class Level Accuracy: " + str(matrix.diagonal()/matrix.sum(axis=1)))
print("Class Level mIoU: " + str(jaccard_score(data_three['label'], result_three['predict_labels'], average=None)))

Average Accuracy: 0.7444252002597965
Class Level Accuracy: [0.71920593 0.98515982]
Class Level mIoU: [0.71808955 0.2676799 ]


In [33]:
# data patch two metric result summary
print("Average Accuracy: " + str(accuracy_score(data_four['label'], result_four['predict_labels'])))
matrix = confusion_matrix(data_four['label'], result_four['predict_labels'])
print("Class Level Accuracy: " + str(matrix.diagonal()/matrix.sum(axis=1)))
print("Class Level mIoU: " + str(jaccard_score(data_four['label'], result_four['predict_labels'], average=None)))

Average Accuracy: 0.5123710667188928
Class Level Accuracy: [0.3957525  0.97658917]
Class Level mIoU: [0.39343863 0.28678507]


In [34]:
# data patch two metric result summary
print("Average Accuracy: " + str(accuracy_score(data_five['label'], result_five['predict_labels'])))
matrix = confusion_matrix(data_five['label'], result_five['predict_labels'])
print("Class Level Accuracy: " + str(matrix.diagonal()/matrix.sum(axis=1)))
print("Class Level mIoU: " + str(jaccard_score(data_five['label'], result_five['predict_labels'], average=None)))

Average Accuracy: 0.8296418358939562
Class Level Accuracy: [0.84440974 0.75328947]
Class Level mIoU: [0.8059512  0.41746768]


In [37]:
# utility function to convert the loaded test sample and result prediction
# into prediction point cloud '.txt' annotation ground truth and prediction files
# for further visualizations with plotly and open3d graphs
def gen_downsampled_pc(data_dict, pred_dict, ground_truth_file, prediction_file, ml3d_flg=True):
    x_ = list(data_dict['point'][:,0])
    y_ = list(data_dict['point'][:,1])
    z_ = list(data_dict['point'][:,2])
    l_ = list(data_dict['label'])
    p_ = list(pred_dict['predict_labels'])
    vals_ = pred_dict['predict_labels'].shape[0]
    rows_ids_cf = random.sample(range(0,len(x_)-1), int(len(x_)/2))
    x_sample = [x_[i] for i in rows_ids_cf]
    y_sample = [y_[i] for i in rows_ids_cf]
    z_sample = [z_[i] for i in rows_ids_cf]
    l_sample = [l_[i] for i in rows_ids_cf]
    p_sample = [p_[i] for i in rows_ids_cf]
    if ml3d_flg == True:
        vals_sample = len(p_sample)
    output_lines_gt = []
    output_lines_prd = []
    if ml3d_flg == True:
        output_lines_gt.append(str(vals_sample))
        output_lines_prd.append(str(vals_sample))
    for i,j,k,l in zip(x_sample, y_sample, z_sample, l_sample):
        output_lines_gt.append(str(i)+" "+str(j)+" "+str(k)+" "+str(l))

    for i,j,k,p in zip(x_sample, y_sample, z_sample, p_sample):
        output_lines_prd.append(str(i)+" "+str(j)+" "+str(k)+" "+str(p))
    
    gt_i = 0
    with open(ground_truth_file, 'w') as f:
        for line in output_lines_gt:
            gt_i = gt_i + 1
            f.write(line)
            if gt_i < len(output_lines_gt):
                f.write('\n')
    prd_i = 0
    with open(prediction_file, 'w') as f:
        for line in output_lines_prd:
            prd_i = prd_i + 1
            f.write(line)
            if prd_i < len(output_lines_prd):
                f.write('\n')

In [38]:
# obtaining the names of the test files for visualization
# this list will change during replication because of randomization in open3d loaders
print(test_split.get_attr(0))
print(test_split.get_attr(1))
print(test_split.get_attr(2))
print(test_split.get_attr(3))
print(test_split.get_attr(4))

{'name': 'data_patch_2_8_0', 'path': 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/test/data_patch_2_8_0.npy', 'split': 'test'}
{'name': 'data_patch_2_7_2', 'path': 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/test/data_patch_2_7_2.npy', 'split': 'test'}
{'name': 'data_patch_2_11_2', 'path': 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/test/data_patch_2_11_2.npy', 'split': 'test'}
{'name': 'data_patch_2_11_1', 'path': 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/test/data_patch_2_11_1.npy', 'split': 'test'}
{'name': 'data_patch_2_8_2', 'path': 'drive/MyDrive/point-cloud-prototyping/datasets/fuji-sfm-dataset/test/data_patch_2_8_2.npy', 'split': 'test'}


In [73]:
# saving test samples for visualization with open3d plotly plots
# argument ml3d_flg = False specifies that the point cloud will not be directly visualized by open3d ploty function
gen_downsampled_pc(data_one, result_one, 'data_patch_2_8_0_gt.txt', 'data_patch_2_8_0_pred.txt', ml3d_flg=False)
gen_downsampled_pc(data_two, result_two, 'data_patch_2_7_2_gt.txt', 'data_patch_2_7_2_pred.txt', ml3d_flg=False)
gen_downsampled_pc(data_three, result_three, 'data_patch_2_11_2_gt.txt', 'data_patch_2_11_2_pred.txt', ml3d_flg=False)
gen_downsampled_pc(data_four, result_four, 'data_patch_2_8_2_gt.txt', 'data_patch_2_8_2_pred.txt', ml3d_flg=False)

## 2.b Prediction Data Patch Visualization

In [40]:
# visualizing the segmentation map of the PC
# example dict of seg colors and their corresponding hex-values
COLOR_TO_HEX = { "green" : "#678286", "magenta" : "#E990C5",
                 "blue" : "#D8E9F7", "yellow" : "#F2D4A2",
                "red" : "#CA5047", "green" : "#689F55"}

def hex_to_rgb(hex):
  return list(round(int(hex[i:i+2], 16)/255,2) for i in (1, 3, 5))

COLOR_TO_RGB = {}

for k,v in COLOR_TO_HEX.items():
    COLOR_TO_RGB[k] = hex_to_rgb(v)



# PC max size for visualization for plotly
MAX_PC_SIZE = 40960 # RandLA-Net's input dimension size

In [55]:
from copy import deepcopy
# downsampling the PC size for standardization of visualized 'cls' & 'bg' PC size
def downsample_pcl(pcl_arr, downsampling_factor):
    assert downsampling_factor >= 0.2 and downsampling_factor < 1.0
    downsampled_pc_count = int(downsampling_factor * pcl_arr.shape[0])
    if downsampled_pc_count > MAX_PC_SIZE:
        downsampled_pc_count = MAX_PC_SIZE
    idx_cls = np.random.randint((pcl_arr.shape[0] - 1),
                                size = downsampled_pc_count)
    pcl_arr = pcl_arr[idx_cls,:] 
    return pcl_arr
     

# saving the downsampled segmented PC for visualization with open3d and plotly
# function to load the npy array and convert to '.txt' readable
# format of open3d's draw_plotly function
def segmap_color_generator(pc_arr, save_path):
    data_patch = deepcopy(pc_arr)
    data_patch = downsample_pcl(data_patch,
                                     downsampling_factor = 0.5)
    data_patch_list = list(data_patch)
    seg_map_list = []
    class_colors_list = COLOR_TO_RGB['red']
    bg_colors_list = COLOR_TO_RGB['green']
    for vals in data_patch_list:
        if vals[3] == 1: # updated to 3, since rgb features not in the base file
            joined_temp_list = list(vals[:3]) + class_colors_list
            seg_map_list.append(joined_temp_list)
        else:
            joined_temp_list = list(vals[:3]) + bg_colors_list
            seg_map_list.append(joined_temp_list)
    
    seg_map = np.array(seg_map_list)
    

    np.savetxt(save_path, seg_map, delimiter=' ')

# visualization of the stored PC files 
import open3d as o3d
def viz_pcl_o3d(pcl_txt_path):
    pcd_viz = o3d.io.read_point_cloud(pcl_txt_path, format='xyzrgb')
    o3d.visualization.draw_plotly([pcd_viz])

In [75]:
# listing all files in the directory and making their segmentation map '.txt' file
# for the visualization task
data_patch_list = ['data_patch_2_11_2_gt.txt', 'data_patch_2_8_0_gt.txt',
                    'data_patch_2_11_2_pred.txt', 'data_patch_2_8_0_pred.txt',
                    'data_patch_2_7_2_gt.txt', 'data_patch_2_8_2_gt.txt',
                    'data_patch_2_7_2_pred.txt', 'data_patch_2_8_2_pred.txt']
for data_patch in data_patch_list:
    data_patch_arr = np.loadtxt(data_patch)
    segmap_color_generator(data_patch_arr, data_patch)

In [None]:
viz_pcl_o3d('data_patch_2_11_2_gt.txt')

In [None]:
viz_pcl_o3d('data_patch_2_11_2_pred.txt')

In [None]:
viz_pcl_o3d('data_patch_2_8_2_gt.txt')

In [None]:
viz_pcl_o3d('data_patch_2_8_2_pred.txt')

In [None]:
viz_pcl_o3d('data_patch_2_7_2_gt.txt')

In [None]:
viz_pcl_o3d('data_patch_2_7_2_pred.txt')