In [1]:
import torch.optim as optim
import torch
import torch.nn as nn
from tqdm import tqdm
import json
import numpy as np
import open3d as o3d


class PointCloudNet(nn.Module):
    def __init__(self, input_dim=3, output_dim=9):
        super(PointCloudNet, self).__init__()
        
        # Define the layers
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=1)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=1)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=1)
        self.conv4 = nn.Conv1d(256, 512, kernel_size=1)
        
        self.fc1 = nn.Linear(512, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, output_dim)
        
        self.relu = nn.ReLU()
        self.global_pool = nn.AdaptiveMaxPool1d(1)

    def forward(self, x):
        # Input x: (batch_size, n_points, 3)
        x = x.transpose(1, 2)  # Transpose to (batch_size, 3, n_points)
        
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        
        x = self.global_pool(x)  # (batch_size, 512, 1)
        x = x.view(x.size(0), -1)  # Flatten to (batch_size, 512)
        
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # No activation function on the output layer
        
        return x
    
    def train_model(self, train_loader, val_loader, num_epochs, criterion, optimizer):
        for epoch in tqdm(range(num_epochs)):
            self.train()
            for i, data in enumerate(train_loader):
                inputs, labels = data
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
            self.eval()
            val_loss = 0.0
            with torch.no_grad():
                for i, data in enumerate(val_loader):
                    inputs, labels = data
                    outputs = self(inputs)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    
            print(f'Epoch {epoch}, Val loss: {val_loss}')


# Example usage
batch_size = 32
n_points = 1000
input_dim = 3

model = PointCloudNet()
input_tensor = torch.randn(batch_size, n_points, input_dim)
output = model(input_tensor)
print(output.shape)  # Expected output shape: (batch_size, 7)

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
torch.Size([32, 9])


In [2]:
from scipy.spatial.transform import Rotation as R

def extract_description_from_obb(corners):
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(corners)
    centroid = pcd.get_center()
    dimensions = pcd.get_max_bound() - pcd.get_min_bound()
    rotations = pcd.get_rotation_matrix_from_xyz(np.zeros(3))

    # Extract centroid, dimensions, and rotations
    cx, cy, cz = centroid
    length, width, height = dimensions
    rx, ry, rz = R.from_matrix(rotations).as_euler('xyz', degrees=True)

    return (cx, cy, cz), (length, width, height), (rx, ry, rz)

def create_obb_from_description(centroid, dimensions, rotations):
    # Extract centroid, dimensions, and rotations
    cx, cy, cz = centroid
    length, width, height = dimensions
    rx, ry, rz = rotations

    # Create the 8 corners of the box before rotation and translation
    dx = length / 2
    dy = width / 2
    dz = height / 2

    corners = np.array([
        [-dx, -dy, -dz],
        [ dx, -dy, -dz],
        [ dx,  dy, -dz],
        [-dx,  dy, -dz],
        [-dx, -dy,  dz],
        [ dx, -dy,  dz],
        [ dx,  dy,  dz],
        [-dx,  dy,  dz]
    ])

    # Apply rotations
    rotation = R.from_euler('xyz', [rx, ry, rz], degrees=True)
    rotated_corners = rotation.apply(corners)

    # Apply translation (centroid)
    translated_corners = rotated_corners + np.array([cx, cy, cz])

    return translated_corners

In [3]:
dataset = json.load(open('dataset.json'))
for obj in dataset:
    dataset[obj]['points'] = np.array(dataset[obj]['points']).reshape(-1, 3)
    dataset[obj]['box'] = np.array(dataset[obj]['box']).reshape(-1, 3)

In [4]:
def normalize_point_cloud_and_obb(point_cloud, obb_vertices):
    # Calculate the centroid of the OBB
    centroid = np.mean(point_cloud, axis=0)
    
    # Center the OBB vertices and point cloud points at the origin
    centered_obb = obb_vertices - centroid
    centered_pc = point_cloud - centroid
    
    # Calculate the maximum distance from the origin for the OBB vertices
    max_distance = np.max(np.linalg.norm(centered_pc, axis=1))
    
    # Scale the OBB vertices and point cloud points
    scaled_obb = centered_obb / max_distance
    scaled_pc = centered_pc / max_distance
    
    # Optionally, translate back to the original centroid
    #scaled_obb += centroid
    #scaled_pc += centroid
    
    return scaled_pc, scaled_obb

In [5]:
#normalize input and output data

for obj in dataset.values():
    obj['points'], obj['box'] = normalize_point_cloud_and_obb(obj['points'], obj['box'])
    centroid, dimensions, rotations = extract_description_from_obb(obj['box'])
    print(rotations)
    box = np.concatenate([centroid, dimensions, rotations])
    obj['box'] = box



(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.0, 0.0)
(0.0, 0.

In [6]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = list(dataset.values())

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        points = torch.tensor(self.dataset[idx]['points'], dtype = torch.float32)
        box = torch.tensor(self.dataset[idx]['box'], dtype = torch.float32).unsqueeze(0)
        return points, box

In [7]:
def draw(points, box):
    centroid = box[0][:3]
    dimensions = box[0][3:6]
    rotations = box[0][6:]
    box = create_obb_from_description(centroid, dimensions, rotations)
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    box = o3d.geometry.OrientedBoundingBox.create_from_points(o3d.utility.Vector3dVector(box))
    box.color = (1, 0, 0)
    o3d.visualization.draw_geometries([pcd, box])

In [8]:
from sklearn.model_selection import train_test_split
data = Dataset(dataset)
train_data, test_data = train_test_split(data, test_size=0.2)
len(train_data), len(test_data)

(80, 21)

In [9]:
data[0]

(tensor([[ 0.0830, -0.0233,  0.0065],
         [ 0.0307, -0.0196, -0.0026],
         [ 0.0029, -0.0217,  0.0019],
         ...,
         [-0.3067, -0.0010,  0.1429],
         [-0.0510, -0.0086,  0.4314],
         [-0.1018, -0.0062,  0.0038]]),
 tensor([[ 0.0803, -0.0413, -0.0250,  1.7846,  0.2566,  1.0685,  0.0000,  0.0000,
           0.0000]]))

In [10]:
draw(*data[38])

In [14]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False)

In [17]:
# Define the regression loss function
criterion = nn.MSELoss()

# Initialize the model, optimizer, and criterion
model = PointCloudNet()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Example training loop
num_epochs = 100
model.train_model(train_loader, test_loader, num_epochs, criterion, optimizer)
USE POINTNET++ AS BACKBONE

  1%|          | 1/100 [00:00<00:58,  1.70it/s]

Epoch 0, Val loss: 4.322236713021994


  2%|▏         | 2/100 [00:01<00:58,  1.67it/s]

Epoch 1, Val loss: 4.0553891896270216


  3%|▎         | 3/100 [00:01<00:57,  1.69it/s]

Epoch 2, Val loss: 4.039977286010981


  4%|▍         | 4/100 [00:02<00:58,  1.65it/s]

Epoch 3, Val loss: 4.3987835962325335


  5%|▌         | 5/100 [00:02<00:55,  1.70it/s]

Epoch 4, Val loss: 4.856437522917986


  6%|▌         | 6/100 [00:03<00:55,  1.69it/s]

Epoch 5, Val loss: 4.8407608941197395


  7%|▋         | 7/100 [00:04<00:55,  1.69it/s]

Epoch 6, Val loss: 3.7917432822287083


  8%|▊         | 8/100 [00:04<00:55,  1.67it/s]

Epoch 7, Val loss: 4.073192936368287


  9%|▉         | 9/100 [00:05<00:57,  1.57it/s]

Epoch 8, Val loss: 4.1452627228572965


 10%|█         | 10/100 [00:06<01:00,  1.50it/s]

Epoch 9, Val loss: 3.690708543173969


 11%|█         | 11/100 [00:06<01:01,  1.44it/s]

Epoch 10, Val loss: 4.123603390529752


 12%|█▏        | 12/100 [00:07<01:00,  1.46it/s]

Epoch 11, Val loss: 3.8690594099462032


 13%|█▎        | 13/100 [00:08<01:04,  1.36it/s]

Epoch 12, Val loss: 3.4088249425403774


 14%|█▍        | 14/100 [00:09<01:06,  1.29it/s]

Epoch 13, Val loss: 3.5049380399286747


 14%|█▍        | 14/100 [00:10<01:02,  1.38it/s]


KeyboardInterrupt: 

In [14]:
data[20]

(tensor([[-0.0696,  0.2045,  0.5444],
         [ 0.0677,  0.1841,  0.5366],
         [ 0.4748, -0.8714, -0.1235],
         [ 0.4535, -0.8574, -0.1840],
         [-0.0831,  0.2040,  0.6457],
         [ 0.0690,  0.1841,  0.6429],
         [ 0.2478, -0.4939, -0.0447],
         [ 0.1023, -0.2801,  0.0536],
         [-0.0401, -0.2186,  0.0734],
         [-0.0021, -0.5592, -0.0832],
         [-0.2359,  0.2864,  0.6172],
         [ 0.3210, -0.6023, -0.1964],
         [-0.2326,  0.3065,  0.5662],
         [-0.0333, -0.1604,  0.1082],
         [ 0.0577, -0.1751,  0.1093],
         [ 0.1078,  0.0517,  0.4179],
         [ 0.3151, -0.5975, -0.1408],
         [-0.0048,  0.0986,  0.4082],
         [-0.0440, -0.0502,  0.0692],
         [ 0.0542, -0.0641,  0.0660],
         [ 0.2524, -0.3195, -0.1716],
         [ 0.1591, -0.1089, -0.1709],
         [-0.2506,  0.0429, -0.0371],
         [ 0.3504, -0.3053, -0.2211],
         [-0.1644,  0.2775,  0.0275],
         [ 0.2457, -0.4271, -0.1717],
         [ 0