## PointNet tests

In [4]:
import matplotlib.pyplot as plt
import torch
import open3d as o3d
import numpy as np
import plotly.graph_objs as go

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [5]:
from learning3d.data_utils import ClassificationData, ModelNet40Data
from torch.utils.data import DataLoader

testset = ClassificationData(ModelNet40Data(train=False))
test_loader = DataLoader(testset, batch_size=32, shuffle=False, drop_last=False, num_workers=4)

In [6]:
def vis(point_cloud_o3d):
    
    print("THE INPUT POINT CLOUD: ", point_cloud_o3d)

    # Convert to NumPy array
    point_cloud_np = np.asarray(point_cloud_o3d.points)

    # Create a Scatter3d trace
    trace = go.Scatter3d(
        x=point_cloud_np[:, 0], y=point_cloud_np[:, 1], z=point_cloud_np[:, 2],
        mode='markers',
        marker=dict(
            size=5,
            opacity=0.8,
        )
    )
    
    # Create the figure and visualize it
    fig = go.Figure(data=[trace])
    fig.show()

In [8]:
for i, data in enumerate(test_loader):
    points, target = data
    target = target[:,0]
    # Save the point cloud image to a file
    points_np = points.detach().cpu().numpy()
    print(points_np.shape)
    # Combine all point clouds in the batch into one for saving
    batch_point_cloud = o3d.geometry.PointCloud()
    i = 0
    for p in points_np[1:]:
        single_point_cloud = o3d.geometry.PointCloud()
        single_point_cloud.points = o3d.utility.Vector3dVector(p[:, :3])  # Assuming x, y, z coordinates are the first three columns
        batch_point_cloud += single_point_cloud
        print("Ground Truth Label: ", testset.get_shape(target[i].item()))
        i += 1
        vis(single_point_cloud)
        break
    break

(32, 1024, 3)
Ground Truth Label:  range_hood
THE INPUT POINT CLOUD:  PointCloud with 1024 points.


## Geometry Search Demo

In [5]:
# 1. Create embeddings for point clouds
# 2. Create joint embeddings for point clouds and texts
# 3. process the user query
# 4. find the nearest neighbours by embedding distance using faiss library
# 5. visualize the results

### PointCloud embeddings and retrieval

1. Test different embedding models.
2. PointGPT model
3. Apply it on on online dataset objects
4. visualize the results in a projector

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Pooling(torch.nn.Module):
	def __init__(self, pool_type='max'):
		self.pool_type = pool_type
		super(Pooling, self).__init__()

	def forward(self, input):
		if self.pool_type == 'max':
			return torch.max(input, 2)[0].contiguous()
		elif self.pool_type == 'avg' or self.pool_type == 'average':
			return torch.mean(input, 2).contiguous()


class PointNet(torch.nn.Module):
    def __init__(self, emb_dims=1024, input_shape="bnc", use_bn=False, global_feat=True):
        print("PointNet Init")
		# emb_dims:			Embedding Dimensions for PointNet.
		# input_shape:		Shape of Input Point Cloud (b: batch, n: no of points, c: channels)
        super(PointNet, self).__init__()
        if input_shape not in ["bcn", "bnc"]:
            raise ValueError("Allowed shapes are 'bcn' (batch * channels * num_in_points), 'bnc' ")
        self.input_shape = input_shape
        self.emb_dims = emb_dims
        self.use_bn = use_bn
        self.global_feat = global_feat
        self.pooling = Pooling('max')

        self.layers = self.create_structure()

    def create_structure(self):
        self.conv1 = torch.nn.Conv1d(3, 64, 1)
        self.conv2 = torch.nn.Conv1d(64, 64, 1)
        self.conv3 = torch.nn.Conv1d(64, 64, 1)
        self.conv4 = torch.nn.Conv1d(64, 128, 1)
        self.conv5 = torch.nn.Conv1d(128, self.emb_dims, 1)
        self.relu = torch.nn.ReLU()

        if self.use_bn:
            self.bn1 = torch.nn.BatchNorm1d(64)
            self.bn2 = torch.nn.BatchNorm1d(64)
            self.bn3 = torch.nn.BatchNorm1d(64)
            self.bn4 = torch.nn.BatchNorm1d(128)
            self.bn5 = torch.nn.BatchNorm1d(self.emb_dims)

        if self.use_bn:
            layers = [self.conv1, self.bn1, self.relu,
                        self.conv2, self.bn2, self.relu,
                        self.conv3, self.bn3, self.relu,
                        self.conv4, self.bn4, self.relu,
                        self.conv5, self.bn5, self.relu]
        else:
            layers = [self.conv1, self.relu,
                        self.conv2, self.relu, 
                        self.conv3, self.relu,
                        self.conv4, self.relu,
                        self.conv5, self.relu]
        return layers


    def forward(self, input_data):
        print("Input Shape: ", input_data.shape)
        # input_data: 		Point Cloud having shape input_shape.
        # output:			PointNet features (Batch x emb_dims)
        if self.input_shape == "bnc":
            num_points = input_data.shape[1]
            input_data = input_data.permute(0, 2, 1)
        else:
            num_points = input_data.shape[2]
        if input_data.shape[1] != 3:
            raise RuntimeError("shape of x must be of [Batch x 3 x NumInPoints]")

        output = input_data
        for idx, layer in enumerate(self.layers):
            output = layer(output)
            print("Layer: ", layer, " Output Shape: ", output.shape)
            if idx == 1 and not self.global_feat: point_feature = output

        output = self.pooling(output)
        return output


In [7]:
model = PointNet()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

print(model.emb_dims)
print(model)

PointNet Init
1024
PointNet(
  (pooling): Pooling()
  (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
  (conv2): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
  (conv3): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
  (conv4): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
  (conv5): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
  (relu): ReLU()
)


In [8]:
def get_embedding(point_cloud, model):
    # point_cloud to tensor
    point_cloud = torch.tensor(point_cloud, dtype=torch.float32)
    point_cloud = point_cloud.unsqueeze(0).to(device)
    print(point_cloud.shape)
    embedding = model(point_cloud)
    return embedding

In [None]:
for i, data in enumerate(test_loader):
    points, target = data
    target = target[:,0]
    # Save the point cloud image to a file
    points_np = points.detach().cpu().numpy()
    print(points_np.shape)
    # Combine all point clouds in the batch into one for saving
    batch_point_cloud = o3d.geometry.PointCloud()
    i = 0
    for p in points_np:
        # get the embedding of p
        embedding = get_embedding(p, model)
        print(embedding.shape)
        single_point_cloud = o3d.geometry.PointCloud()
        single_point_cloud.points = o3d.utility.Vector3dVector(p[:, :3])  # Assuming x, y, z coordinates are the first three columns
        batch_point_cloud += single_point_cloud
        print("Ground Truth Label: ", testset.get_shape(target[i].item()))
        i += 1
        vis(single_point_cloud)
        break
    break

### Original PointNet model

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
   def __init__(self, k=3):
      super().__init__()
      self.k=k
      self.conv1 = nn.Conv1d(k,64,1)
      self.conv2 = nn.Conv1d(64,128,1)
      self.conv3 = nn.Conv1d(128,1024,1)
      self.fc1 = nn.Linear(1024,512)
      self.fc2 = nn.Linear(512,256)
      self.fc3 = nn.Linear(256,k*k)

      self.bn1 = nn.BatchNorm1d(64)
      self.bn2 = nn.BatchNorm1d(128)
      self.bn3 = nn.BatchNorm1d(1024)
      self.bn4 = nn.BatchNorm1d(512)
      self.bn5 = nn.BatchNorm1d(256)
       

   def forward(self, input):
      # input.shape == (bs,n,3)
      bs = input.size(0)
      xb = F.relu(self.bn1(self.conv1(input)))
      xb = F.relu(self.bn2(self.conv2(xb)))
      xb = F.relu(self.bn3(self.conv3(xb)))
      pool = nn.MaxPool1d(xb.size(-1))(xb)
      flat = nn.Flatten(1)(pool)
      xb = F.relu(self.bn4(self.fc1(flat)))
      xb = F.relu(self.bn5(self.fc2(xb)))
      
      #initialize as identity
      init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
      if xb.is_cuda:
        init=init.cuda()
      matrix = self.fc3(xb).view(-1,self.k,self.k) + init
      return matrix


class Transform(nn.Module):
   def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3,64,1)

        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)
       

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
       
   def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self, classes = 10, emb_dim=1024):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)
        

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(p=0.3)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
        input = input.permute(0, 2, 1)
        xb, matrix3x3, matrix64x64 = self.transform(input)
      #   xb = F.relu(self.bn1(self.fc1(xb)))
      #   xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
        output = xb
        return output
      #   output = self.fc3(xb)
      #   return self.logsoftmax(output), matrix3x3, matrix64x64

In [8]:
model = PointNet()

# Load the model state_dict from the file
model_path = '/home/hussein_younes_microsurgeonbot_c/asset-search/learning3d/pretrained/save.pth'
model.load_state_dict(torch.load(model_path))

# add the model to the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Put the model in evaluation mode
model.eval()

PointNet(
  (transform): Transform(
    (input_transform): Tnet(
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (fc1): Linear(in_features=1024, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=9, bias=True)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (feature_transform): Tnet(
      (conv1): Conv1d(64, 64, kernel_size=(1,

In [10]:
# apply the model on a point cloud from the testset
embeddings = []
i = 0

for idx, data in enumerate(test_loader):
    
    if i > 500:
        break
    
    points, target = data
    target = target[:,0]
    # Save the point cloud image to a file
    points_np = points.detach().cpu().numpy()
    print(points_np.shape)
    # Combine all point clouds in the batch into one for saving
    batch_point_cloud = o3d.geometry.PointCloud()
    
    for p in points_np:
        # get the embedding of p
        point_cloud = torch.tensor(p, dtype=torch.float32)
        point_cloud = point_cloud.unsqueeze(0).to(device)
        embedding = model(point_cloud)
        embeddings.append(embedding)
        single_point_cloud = o3d.geometry.PointCloud()
        single_point_cloud.points = o3d.utility.Vector3dVector(p[:, :3])  # Assuming x, y, z coordinates are the first three columns
        batch_point_cloud += single_point_cloud
        print("Ground Truth Label: ", testset.get_shape(target[idx].item()))
        i += 1

(32, 1024, 3)
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range

In [11]:
def get_image(point_cloud_o3d):
    
    point_cloud_np = np.asarray(point_cloud_o3d.points)

    # Create a Scatter3d trace
    trace = go.Scatter3d(
        x=point_cloud_np[:, 0], y=point_cloud_np[:, 1], z=point_cloud_np[:, 2],
        mode='markers',
        marker=dict(
            size=5,
            opacity=0.8,
        )
    )    
    
    fig = go.Figure(data=[trace])
    
    # convert to image
    img_bytes = fig.to_image(format="png")
    
    return img_bytes

In [25]:
# get the first data point from test_loader
points, target = next(iter(test_loader))
single_point_cloud = o3d.geometry.PointCloud()
single_point_cloud.points = o3d.utility.Vector3dVector(points[0][:, :3])
img_bytes = get_image(single_point_cloud)

# read the image with PIL
from PIL import Image
import io

img = Image.open(io.BytesIO(img_bytes))
# get the dimensions of the image
img.size

(700, 500)

In [24]:
# apply the model on a point cloud from the testset
images = []
i = 0

for idx, data in enumerate(test_loader):
    print(i)
    
    if i > 500:
        break
    
    points, target = data
    target = target[:,0]
    # Save the point cloud image to a file
    points_np = points.detach().cpu().numpy()
    print(points_np.shape)
    # Combine all point clouds in the batch into one for saving
    batch_point_cloud = o3d.geometry.PointCloud()
    
    for p in points_np:
        i += 1
        # get the embedding of p
        single_point_cloud = o3d.geometry.PointCloud()
        single_point_cloud.points = o3d.utility.Vector3dVector(p[:, :3])  # Assuming x, y, z coordinates are the first three columns
        batch_point_cloud += single_point_cloud
        images.append(get_image(single_point_cloud))
        print("Ground Truth Label: ", testset.get_shape(target[idx].item()))

0
(32, 1024, 3)
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  range_hood
Ground Truth Label:  ran

In [20]:
images = images[:738]
len(images)

738

In [23]:
type(images[1])

plotly.graph_objs._figure.Figure