# Introduction to Point Cloud and Deep Learning

### How does our data looks like?

It is not important how we see the data, but it is the key factor for a neural network how it sees the incoming data. Does it mean something to the network? Does it represent some structure, symmetry, or geometry?

Let us visualize, what our network is going to see.


### 1. ModelNet Data

In [1]:
### import required modules
import torch
from dataloaders.ModelNetDataLoader import ModelNetDataLoader


In [2]:
### Donot change it if you want to visualize what is going to happen in the training case (1-Classification.ipynb)
class Args:
    '''PARAMETERS'''
    use_cpu =False
    gpu='0'
    batch_size = 24
    model='pointnet_cls'
    num_category = 40
    epoch=200
    learning_rate=0.001
    num_point=1024
    optimizer='Adam'
    log_dir = 'runs'
    decay_rate=1e-4
    use_normals=False
    process_data=False
    use_uniform_sample=False

args = Args()


In [3]:
### ModelNet40 Dataset (we are going to use it in Classification)

data_path = '../../data/modelnet40_normal_resampled/'

train_dataset = ModelNetDataLoader(root=data_path, args=args,  split='train', process_data=args.process_data)
trainDataLoader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=10, drop_last=True)

The size of train data is 9843


In [4]:
### Classification TrainDataLoader

dataloader_iterator = iter(trainDataLoader)
for i in range(1):
    try:
        data, target = next(dataloader_iterator)
    except StopIteration:
        dataloader_iterator = iter(trainDataLoader)
        data, target = next(dataloader_iterator)

In [6]:
### Get the data 
import pyvista as pv
pv.set_jupyter_backend('ipygany')
import numpy as np


pointsss = data
points = data.numpy()
class_label = target
classes = target.numpy()

print(pointsss.shape)
print(target.shape)


### Visualize the data: Choose value of i < batch_size
i = 19
points = points[i]
class_label = class_label[i]

color = np.ones((points.shape[0], 1))
color = color * classes

data_plt = pv.PolyData(points)
data_plt['color'] = color 
data_plt.plot()


print(class_label)


torch.Size([24, 1024, 3])
torch.Size([24])


AppLayout(children=(VBox(children=(HTML(value='<h3>color</h3>'), Dropdown(description='Colormap:', options={'B…

tensor(4, dtype=torch.int32)


### 2. S3DIS Data



In [7]:
import torch
from dataloaders.S3DISDataLoader import S3DISDataset
import time
import numpy as np
import os

In [8]:
class Args:
    '''PARAMETERS'''
    gpu='0'
    batch_size = 16
    model='pointnet_sem_seg'
    epoch=32
    learning_rate=0.001
    num_point=1024
    optimizer='Adam'
    log_dir = 'runs'
    decay_rate=1e-4
    npoint = 4096
    step_size =10
    lr_decay = 0.7
    test_area=5
args = Args()

In [9]:
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

In [10]:
root = '../../data/stanford_indoor3d/'
NUM_CLASSES = 13
NUM_POINT = args.npoint
BATCH_SIZE = args.batch_size

print("start loading training data ...")
TRAIN_DATASET = S3DISDataset(split='train', data_root=root, num_point=NUM_POINT, test_area=args.test_area, block_size=1.0, sample_rate=1.0, transform=None)

trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=BATCH_SIZE, shuffle=True, num_workers=10,
                                                pin_memory=True, drop_last=True,
                                                worker_init_fn=lambda x: np.random.seed(x + int(time.time())))
print("The number of training data is: %d" % len(TRAIN_DATASET))

start loading training data ...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 204/204 [00:21<00:00,  9.63it/s]

[1.1233332 1.1800324 1.        2.238213  2.337216  2.3404622 1.7047739
 2.0308683 1.8827153 3.8201103 1.7911378 2.7820194 1.343442 ]
Totally 47576 samples in train set.
The number of training data is: 47576





In [11]:
dataloader_iterator = iter(trainDataLoader)
for i in range(1):
    try:
        data, target = next(dataloader_iterator)
    except StopIteration:
        dataloader_iterator = iter(trainDataLoader)
        data, target = next(dataloader_iterator)

classes = 
0: Ceiling, 1: Floor, 2: Wall, 3: Beam, 4: Column, 5: Window, 6: Door, 7: Table, 8: Chair, 9: Sofa, 10: Bookcase, 11: Board, 12: Clutter

In [13]:
### Get the data 
import pyvista as pv
pv.set_jupyter_backend('ipygany')
import numpy as np

pointsss = data
points = data.numpy()
points = points[:, :, 0:3]
class_labels = target
classes = class_labels.numpy()

print(pointsss.shape)
print(target.shape)

### Visualize the data: Choose value of i < batch_size
i = 1

points = points[i]

color = classes[i]

data_plt = pv.PolyData(points)
data_plt.points *= 10

class_color = color 


plotter = pv.Plotter()
plotter.add_mesh(data_plt, scalars=class_color)
plotter.show()

torch.Size([16, 4096, 9])
torch.Size([16, 4096])


AppLayout(children=(VBox(children=(HTML(value='<h3></h3>'), Dropdown(description='Colormap:', options={'BrBG':…

tensor(4, dtype=torch.int32)


## PointNet at a glance

In [15]:
from models.pointnet_sem_seg import get_model, get_loss

classifier = get_model(NUM_CLASSES).cuda()


In [16]:
print(classifier)

get_model(
  (feat): PointNetEncoder(
    (stn): STN3d(
      (conv1): Conv1d(9, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (fc1): Linear(in_features=1024, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=9, bias=True)
      (relu): ReLU()
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv1): Conv1d(9, 64, kernel_size=(1,), stride=(1,))
    (c

#### PointNet Total Parameters

In [17]:
from torchsummary import summary
classifier = classifier.cuda()
summary(classifier, (9, 1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 64, 1024]             640
       BatchNorm1d-2             [-1, 64, 1024]             128
            Conv1d-3            [-1, 128, 1024]           8,320
       BatchNorm1d-4            [-1, 128, 1024]             256
            Conv1d-5           [-1, 1024, 1024]         132,096
       BatchNorm1d-6           [-1, 1024, 1024]           2,048
            Linear-7                  [-1, 512]         524,800
       BatchNorm1d-8                  [-1, 512]           1,024
            Linear-9                  [-1, 256]         131,328
      BatchNorm1d-10                  [-1, 256]             512
           Linear-11                    [-1, 9]           2,313
            STN3d-12                 [-1, 3, 3]               0
           Conv1d-13             [-1, 64, 1024]             640
      BatchNorm1d-14             [-1, 6

### T-Networks at a glance

In [18]:
from models.pointnet_utils import STN3d

#### T-Net on Input

In [19]:
trans = STN3d(channel=3)

In [20]:
print(trans)

STN3d(
  (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
  (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
  (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=9, bias=True)
  (relu): ReLU()
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


#### Input T-Net total params

In [21]:
from torchsummary import summary
trans_sum = trans.cuda()
summary(trans_sum, (3,1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 64, 1024]             256
       BatchNorm1d-2             [-1, 64, 1024]             128
            Conv1d-3            [-1, 128, 1024]           8,320
       BatchNorm1d-4            [-1, 128, 1024]             256
            Conv1d-5           [-1, 1024, 1024]         132,096
       BatchNorm1d-6           [-1, 1024, 1024]           2,048
            Linear-7                  [-1, 512]         524,800
       BatchNorm1d-8                  [-1, 512]           1,024
            Linear-9                  [-1, 256]         131,328
      BatchNorm1d-10                  [-1, 256]             512
           Linear-11                    [-1, 9]           2,313
Total params: 803,081
Trainable params: 803,081
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/

#### T-Net on Features

In [22]:
from models.pointnet_utils import STNkd

In [23]:
trans1 = STNkd(k=64)

In [24]:
print(trans1)

STNkd(
  (conv1): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
  (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
  (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=4096, bias=True)
  (relu): ReLU()
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


#### Features T-Net total params

In [25]:
from torchsummary import summary
trans1_sum = trans1.cuda()
summary(trans1_sum, (64,1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 64, 1024]           4,160
       BatchNorm1d-2             [-1, 64, 1024]             128
            Conv1d-3            [-1, 128, 1024]           8,320
       BatchNorm1d-4            [-1, 128, 1024]             256
            Conv1d-5           [-1, 1024, 1024]         132,096
       BatchNorm1d-6           [-1, 1024, 1024]           2,048
            Linear-7                  [-1, 512]         524,800
       BatchNorm1d-8                  [-1, 512]           1,024
            Linear-9                  [-1, 256]         131,328
      BatchNorm1d-10                  [-1, 256]             512
           Linear-11                 [-1, 4096]       1,052,672
Total params: 1,857,344
Trainable params: 1,857,344
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.25
Forw

### The PointNet Encoder (provides the Global Features)

The encoder provides the global features from the point cloud. However this encoder doesnot consider the local features. The point features can be obtained from the features transform network.

In the classification network, local features are ignored while in the segmentation network, the local features are concatenated with global features.

PointNet encoder can be used for feature extraction and then these features can be used with traditional regressors or SVM classifiers. 

In [26]:
from models.pointnet_utils import PointNetEncoder

In [27]:
encoder = PointNetEncoder(global_feat=True, feature_transform=False, channel=3)

In [28]:
print(encoder)

PointNetEncoder(
  (stn): STN3d(
    (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
    (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
    (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
    (fc1): Linear(in_features=1024, out_features=512, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=9, bias=True)
    (relu): ReLU()
    (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
  (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))

In [29]:
from torchsummary import summary
encoder_sum = encoder.cuda()
summary(encoder_sum, (3,1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 64, 1024]             256
       BatchNorm1d-2             [-1, 64, 1024]             128
            Conv1d-3            [-1, 128, 1024]           8,320
       BatchNorm1d-4            [-1, 128, 1024]             256
            Conv1d-5           [-1, 1024, 1024]         132,096
       BatchNorm1d-6           [-1, 1024, 1024]           2,048
            Linear-7                  [-1, 512]         524,800
       BatchNorm1d-8                  [-1, 512]           1,024
            Linear-9                  [-1, 256]         131,328
      BatchNorm1d-10                  [-1, 256]             512
           Linear-11                    [-1, 9]           2,313
            STN3d-12                 [-1, 3, 3]               0
           Conv1d-13             [-1, 64, 1024]             256
      BatchNorm1d-14             [-1, 6