## Model Training, Inference and Evaluation Module

In this notebook we first train the segmentation model, plot logging plots and perform evaluation on the trained apple segmentation task.

#### 1. Open3D's RanLA-Net Model Setup and Training

In this section we declare a custom dataloader object for Open3D's segmentation model training and then plot the tensorboard logger plots for better model learning understanding.

#### 2. Model Performance Inference and Evaluation

In this section we evaluate performance of the trained segmentation model, and also plot the final segmentation outputs for better qualitative understanding.

In [None]:
# for loading the dataset into the runtime from google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# installing open3d library for importing RandLa-Net model implementation
!pip install open3d

In [None]:
# cloning repository for building the creating custom dataloaders
!git clone https://github.com/isl-org/Open3D-ML

In [None]:
# installing compatible pytorch version
!pip install -r Open3D-ML/requirements-torch-cuda.txt

In [None]:
# checking the torch installed version
# latest version 1.13.0+cu116 is not compatible with the 
import torch
print(torch.__version__)

1.13.1+cu116


In [None]:
# setting up for the Open3D-ML package after all the environment restarts
import sys
sys.path.insert(0,'Open3D-ML')

In [None]:
# checking the path variables for the Open3D-ML package source code
from ml3d.datasets.base_dataset import BaseDataset, BaseDatasetSplit
from ml3d.utils import make_dir, DATASET

In [None]:
# extracting the fuji data dataset for model training into current runtime
# TODO: update the corresponding path variables for 
!unzip drive/MyDrive/point-cloud-prototyping/datasets/fuji-norm-complete-dataset.zip -d .

In [None]:
# listing paths of all the splits of the norm pfuji size dataset
!ls fuji-norm-dataset/train

In [None]:
# cleaning the fuji-sfm dataset from the complete dataset to train only on pfuji size dataset
import re, os, shutil
folder_paths = ['fuji-norm-dataset/train',
                'fuji-norm-dataset/test',
                'fuji-norm-dataset/valid']

for folder in folder_paths:
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        nums = re.findall(r'\d+', file_path)
        if int(nums[0]) < 160:
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                print('Failed to delete %s. Reason: %s' % (file_path, e))

In [None]:
# utilizing only the normals information for training and removing the rgb during training

### 1. Open3D's RanLA-Net Model Setup and Training

In [None]:
# custom data set loader code, loads the '.npy' files from train, val & test directories
import numpy as np
import os, sys, glob, pickle
from pathlib import Path
from os.path import join, exists, dirname, abspath
from sklearn.neighbors import KDTree
from tqdm import tqdm
import logging

# from .base_dataset import BaseDataset, BaseDatasetSplit
# from ..utils import make_dir, DATASET
# updated paths for the ml3d github source code usage
from ml3d.datasets.base_dataset import BaseDataset, BaseDatasetSplit
from ml3d.utils import make_dir, DATASET

log = logging.getLogger(__name__)
# Expect point clouds to be in npy format with train, val and test files in separate folders
# Expected format of npy files : ['x', 'y', 'z', 'feat_1', 'feat_2', ........,'feat_n', 'class']
# For test files, format should be : ['x', 'y', 'z', 'feat_1', 'feat_2', ........,'feat_n', 'class']

class Custom3DSplit(BaseDatasetSplit):
    """This class is used to create a custom dataset split.
    Initialize the class.
    Args:
        dataset: The dataset to split.
        split: A string identifying the dataset split that is usually one of
        'training', 'test', 'validation', or 'all'.
        **kwargs: The configuration of the model as keyword arguments.
    Returns:
        A dataset split object providing the requested subset of the data.
    """

    def __init__(self, dataset, split='training'):
        super().__init__(dataset, split=split)
        self.cfg = dataset.cfg
        path_list = dataset.get_split_list(split)
        log.info("Found {} pointclouds for {}".format(len(path_list), split))
        self.path_list = path_list
        self.split = split
        self.dataset = dataset

    def __len__(self):
        return len(self.path_list)

    def get_data(self, idx):
        pc_path = self.path_list[idx]
        data = np.load(pc_path)
  
        points = np.array(data[:, :3], dtype=np.float32)
        feat = np.array(data[:, 6:9], dtype=np.float32)
        labels = np.array(data[:, 9], dtype=np.int32).reshape((-1,))

        data = {'point': points,  'feat': feat, 'label': labels}
        return data

    def get_attr(self, idx):
        pc_path = Path(self.path_list[idx])
        name = pc_path.name.replace('.npy', '')
        attr = {'name': name, 'path': str(pc_path), 'split': self.split}
        return attr


class Custom3D(BaseDataset):
    """A template for customized dataset that you can use with a dataloader to
    feed data when training a model. This inherits all functions from the base
    dataset and can be modified by users. Initialize the function by passing the
    dataset and other details.
    Args:
        dataset_path: The path to the dataset to use.
        name: The name of the dataset.
        cache_dir: The directory where the cache is stored.
        use_cache: Indicates if the dataset should be cached.
        num_points: The maximum number of points to use when splitting the dataset.
        ignored_label_inds: A list of labels that should be ignored in the dataset.
        test_result_folder: The folder where the test results should be stored.
    """

    def __init__(self,
                 dataset_path,
                 name='Custom3D',
                 cache_dir='./logs/cache',
                 use_cache=False,
                 num_points=65536,
                 ignored_label_inds=[],
                 test_result_folder='./test',
                 **kwargs):

        super().__init__(dataset_path=dataset_path,
                         name=name,
                         cache_dir=cache_dir,
                         use_cache=use_cache,
                         num_points=num_points,
                         ignored_label_inds=ignored_label_inds,
                         test_result_folder=test_result_folder,
                         **kwargs)

        cfg = self.cfg

        self.dataset_path = cfg.dataset_path

        self.label_to_names = self.get_label_to_names()

        self.num_classes = len(self.label_to_names)
        self.label_values = np.sort([k for k, v in self.label_to_names.items()])
        self.label_to_idx = {l: i for i, l in enumerate(self.label_values)}
        self.ignored_labels = np.array(cfg.ignored_label_inds)

        self.train_dir = str(Path(cfg.dataset_path) / cfg.train_dir)
        self.val_dir = str(Path(cfg.dataset_path) / cfg.val_dir)
        self.test_dir = str(Path(cfg.dataset_path) / cfg.test_dir)
        # verifying the list of the directories for info
        print(self.train_dir, self.val_dir, self.test_dir)
        self.train_files = [f for f in glob.glob(self.train_dir + "/*.npy")]
        self.val_files = [f for f in glob.glob(self.val_dir + "/*.npy")]
        self.test_files = [f for f in glob.glob(self.test_dir + "/*.npy")]
        # verifying the list of the loaded files for info
        print("Training Data List: " ,self.train_files, "\nValidation Data List: ", self.val_files, "\nTesting Data List: ", self.test_files)

    @staticmethod
    def get_label_to_names():
        """Returns a label to names dictionary object.
        Returns:
            A dict where keys are label numbers and
            values are the corresponding names.
        """
        label_to_names = {0: 'background', 1: 'apple'}
        return label_to_names

    def get_split(self, split):
        """Returns a dataset split.
        Args:
            split: A string identifying the dataset split that is usually one of
            'training', 'test', 'validation', or 'all'.
        Returns:
            A dataset split object providing the requested subset of the data.
        """
        return Custom3DSplit(self, split=split)

    def get_split_list(self, split):
        """Returns a dataset split.
        Args:
            split: A string identifying the dataset split that is usually one of
            'training', 'test', 'validation', or 'all'.
        Returns:
            A dataset split object providing the requested subset of the data.
        Raises:
             ValueError: Indicates that the split name passed is incorrect. The
             split name should be one of 'training', 'test', 'validation', or
             'all'.
        """
        if split in ['test', 'testing']:
            self.rng.shuffle(self.test_files)
            return self.test_files
        elif split in ['val', 'validation']:
            self.rng.shuffle(self.val_files)
            return self.val_files
        elif split in ['train', 'training']:
            self.rng.shuffle(self.train_files)
            return self.train_files
        elif split in ['all']:
            files = self.val_files + self.train_files + self.test_files
            
            return files
        else:
            raise ValueError("Invalid split {}".format(split))

    def is_tested(self, attr):
        """Checks if a datum in the dataset has been tested.
        Args:
            dataset: The current dataset to which the datum belongs to.
            attr: The attribute that needs to be checked.
        Returns:
            If the dataum attribute is tested, then return the path where the
            attribute is stored; else, returns false.
        """
        cfg = self.cfg
        name = attr['name']
        path = cfg.test_result_folder
        store_path = join(path, self.name, name + '.npy')
        if exists(store_path):
            print("{} already exists.".format(store_path))
            return True
        else:
            return False

    def save_test_result(self, results, attr):
        """Saves the output of a model.
        Args:
            results: The output of a model for the datum associated with the attribute passed.
            attr: The attributes that correspond to the outputs passed in results.
        """
        cfg = self.cfg
        name = attr['name']
        path = cfg.test_result_folder
        make_dir(path)
        pred = results['predict_labels']
        pred = np.array(self.label_to_names[pred])
        store_path = join(path, name + '.npy')
        np.save(store_path, pred)

DATASET._register_module(Custom3D)

In [None]:
# torch related import statements
import os
import random
import open3d.ml as _ml3d
import open3d.ml.torch as ml3d
# general import statements
import logging
import numpy as np
from tqdm import tqdm
from pathlib import Path
import os, sys, glob, pickle
from sklearn.neighbors import KDTree
from os.path import join, exists, dirname, abspath

In [None]:
# RandLA-Net model configuration file for S3DIS dataset
!cat Open3D-ML/ml3d/configs/randlanet_s3dis.yml
# modifying this file based on the pipe dataset requirements with %%writefile command
# for custom dataset we are modifying configurations like, data subset paths, classes, class weights etc.

In [None]:
# loading the previous checkpoint for further training after runtime disconnect
!ls drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/RandLANet_fuji-apple-norm-segmentation_torch/checkpoint/ckpt_00010.pth

drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/RandLANet_fuji-apple-norm-segmentation_torch/checkpoint/ckpt_00010.pth


In [None]:
%%writefile Open3D-ML/ml3d/configs/randlanet_s3dis.yml
dataset:
  name: fuji-apple-norm-segmentation
  dataset_path: fuji-norm-dataset
  train_dir: train
  val_dir: valid
  test_dir: test
  cache_dir: drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/cache
  class_weights: []
  ignored_label_inds: []
  num_points: 40960
  test_area_idx: 1
  test_result_folder: ./test
  use_cache: False
model:
  name: RandLANet
  batcher: DefaultBatcher
  ckpt_path: 
  num_neighbors: 16
  num_layers: 5
  num_points: 40960
  num_classes: 2
  ignored_label_inds: []
  sub_sampling_ratio: [4, 4, 4, 4, 2]
  in_channels: 6
  dim_features: 8
  dim_output: [16, 64, 128, 256, 512]
  grid_size: 0.04
  augment:
    recenter:
      dim: [0, 1]
    rotate:
      method: vertical
    scale:
      min_s: 0.9
      max_s: 1.1
    noise:
      noise_std: 0.001
pipeline:
  name: SemanticSegmentation
  optimizer:
    lr: 0.001
  batch_size: 1
  main_log_dir: drive/MyDrive/point-cloud-prototyping/fuji-seg-logs
  max_epoch: 50
  save_ckpt_freq: 5
  scheduler_gamma: 0.99
  test_batch_size: 3
  train_sum_dir: drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/train-logs
  val_batch_size: 5
  summary:
    record_for: []
    max_pts:
    use_reference: false
    max_outputs: 1

Overwriting Open3D-ML/ml3d/configs/randlanet_s3dis.yml


In [None]:
# removing 'class_weights' vector for model training, know logged bug
# otherwise, [1,3] (or [1, num_classes]) incompatible shape error is produced
cfg_file = "Open3D-ML/ml3d/configs/randlanet_s3dis.yml"
cfg = _ml3d.utils.Config.load_from_file(cfg_file)
model = ml3d.models.RandLANet(**cfg.model)

In [None]:
# verifying the updated config file for the custom data loader and RandLa-net model
print(cfg.dataset)
print(cfg.pipeline)

{'name': 'fuji-apple-norm-segmentation', 'dataset_path': 'fuji-norm-dataset', 'train_dir': 'train', 'val_dir': 'valid', 'test_dir': 'test', 'cache_dir': 'drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/cache', 'class_weights': [], 'ignored_label_inds': [], 'num_points': 40960, 'test_area_idx': 1, 'test_result_folder': './test', 'use_cache': False}
{'name': 'SemanticSegmentation', 'optimizer': {'lr': 0.001}, 'batch_size': 1, 'main_log_dir': 'drive/MyDrive/point-cloud-prototyping/fuji-seg-logs', 'max_epoch': 50, 'save_ckpt_freq': 5, 'scheduler_gamma': 0.99, 'test_batch_size': 3, 'train_sum_dir': 'drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/train-logs', 'val_batch_size': 5, 'summary': {'record_for': [], 'max_pts': None, 'use_reference': False, 'max_outputs': 1}}


In [None]:
# loading the custom dataset object for model training
dataset = Custom3D(cfg.dataset.pop('dataset_path', None), **cfg.dataset)

fuji-norm-dataset/train fuji-norm-dataset/valid fuji-norm-dataset/test
Training Data List:  ['fuji-norm-dataset/train/data_patch_238.npy', 'fuji-norm-dataset/train/data_patch_263.npy', 'fuji-norm-dataset/train/data_patch_367.npy', 'fuji-norm-dataset/train/data_patch_281.npy', 'fuji-norm-dataset/train/data_patch_364.npy', 'fuji-norm-dataset/train/data_patch_396.npy', 'fuji-norm-dataset/train/data_patch_383.npy', 'fuji-norm-dataset/train/data_patch_262.npy', 'fuji-norm-dataset/train/data_patch_174.npy', 'fuji-norm-dataset/train/data_patch_197.npy', 'fuji-norm-dataset/train/data_patch_357.npy', 'fuji-norm-dataset/train/data_patch_402.npy', 'fuji-norm-dataset/train/data_patch_286.npy', 'fuji-norm-dataset/train/data_patch_178.npy', 'fuji-norm-dataset/train/data_patch_189.npy', 'fuji-norm-dataset/train/data_patch_328.npy', 'fuji-norm-dataset/train/data_patch_316.npy', 'fuji-norm-dataset/train/data_patch_209.npy', 'fuji-norm-dataset/train/data_patch_267.npy', 'fuji-norm-dataset/train/data_pat

In [None]:
# exploration: verifying whether loaded point cloud data is correct
# get the 'all' split that combines training, validation and test set
all_split = dataset.get_split('all')
print(all_split)
# print the attributes of the first datum
print(all_split.get_attr(0))
# print the shape of the first point cloud
print(all_split.get_data(0)['point'].shape)

<__main__.Custom3DSplit object at 0x7fb70284ca30>
{'name': 'data_patch_298', 'path': 'fuji-norm-dataset/valid/data_patch_298.npy', 'split': 'all'}
(50509, 3)


In [None]:
# semseg file updation for adding label smoothing and
# additional penalization for apple class misclassification
!cat /usr/local/lib/python3.9/dist-packages/open3d/_ml3d/torch/modules/losses/semseg_loss.py

In [None]:
%%writefile /usr/local/lib/python3.9/dist-packages/open3d/_ml3d/torch/modules/losses/semseg_loss.py
import torch
import torch.nn as nn

from ....datasets.utils import DataProcessing


def filter_valid_label(scores, labels, num_classes, ignored_label_inds, device):
    """Loss functions for semantic segmentation."""
    valid_scores = scores.reshape(-1, num_classes).to(device)
    valid_labels = labels.reshape(-1).to(device)

    ignored_bool = torch.zeros_like(valid_labels, dtype=torch.bool)
    for ign_label in ignored_label_inds:
        ignored_bool = torch.logical_or(ignored_bool,
                                        torch.eq(valid_labels, ign_label))

    valid_idx = torch.where(torch.logical_not(ignored_bool))[0].to(device)

    valid_scores = torch.gather(valid_scores, 0,
                                valid_idx.unsqueeze(-1).expand(-1, num_classes))
    valid_labels = torch.gather(valid_labels, 0, valid_idx)

    # Reduce label values in the range of logit shape
    reducing_list = torch.arange(0, num_classes, dtype=torch.int64)
    inserted_value = torch.zeros([1], dtype=torch.int64)

    for ign_label in ignored_label_inds:
        if ign_label >= 0:

            reducing_list = torch.cat([
                reducing_list[:ign_label], inserted_value,
                reducing_list[ign_label:]
            ], 0)
    valid_labels = torch.gather(reducing_list.to(device), 0,
                                valid_labels.long())

    return valid_scores, valid_labels


class SemSegLoss(object):
    """Loss functions for semantic segmentation."""

    def __init__(self, pipeline, model, dataset, device):
        super(SemSegLoss, self).__init__()
        # weighted_CrossEntropyLoss
        if 'class_weights' in dataset.cfg.keys() and len(
                dataset.cfg.class_weights) != 0:
            class_wt = DataProcessing.get_class_weights(
                dataset.cfg.class_weights)
            weights = torch.tensor(class_wt, dtype=torch.float, device=device)

            self.weighted_CrossEntropyLoss = nn.CrossEntropyLoss(weight=weights)
        else:
            weights = torch.tensor([1.0, 1.618], dtype=torch.float).squeeze(-1)
            self.weighted_CrossEntropyLoss = nn.CrossEntropyLoss(weight=weights, ignore_index=2, label_smoothing=0.0625)

Overwriting /usr/local/lib/python3.9/dist-packages/open3d/_ml3d/torch/modules/losses/semseg_loss.py


In [None]:
# creating the segmentation pipeline for RandLA-Net model training
pipeline = ml3d.pipelines.SemanticSegmentation(model, dataset=dataset, device="auto", **cfg.pipeline)
# ckpt_path = "drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/RandLANet_fuji-apple-norm-segmentation_torch/checkpoint/ckpt_00010.pth"
# pipeline.load_ckpt(ckpt_path=ckpt_path)

In [None]:
# training the model from scratch with the pipe dataset, keep connection active
pipeline.run_train()
# known bug, adding 'class_weights' to configuration file give shape head error

In [None]:
# for loading the training logs to observe the learning curve during training 
%load_ext tensorboard
%tensorboard --logdir drive/MyDrive/point-cloud-prototyping/fuji-seg-logs/train-logs