# Overall flow 

1. Get the ground truth bounding boxes

2. Find the dimensions of the scene/ 8 corners of the scene

3. With that, find the center of the scene

4. Partition the scene into **n** spatial zones, divide the length, breadth, height by **2n** which will be the per-zone increment outwards from the center
    * Output: (n, 3, 2) : where n is the number of zones, 3 is the 3 axes, 2 being the upper and lower bounds for each zone.

5. Check which zone a GT bounding box falls under
    * Input: center of a GT bounding box
    * Output: int indicating the zone

6.  For a scene:
    * For each GT bounding box in the scene:
        * Get the center of the bounding box
        * Call function to find which zone the GT bounding box falls under
        * Have a mapping of Zone: GT bounding boxes

7. Plot lines that demarcate the n spatial zones in a scene.

8. Plot pcd and GT bounding boxes to verify if the alloation of spatial zones are accurate.

## Getting the GT Bounding boxes

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from scannet_val import ScannetValDataset
import open3d as o3d
import os

classes = ['bathtub', 'bed', 'bookshelf', 'cabinet', 'chair', 'counter', 'curtain', 'desk', 'door', 'otherfurniture',
                      'picture', 'refrigerator', 'showercurtain', 'sink', 'sofa', 'table', 'toilet', 'window']

box_sizes = {'cabinet': np.array([0.76966726, 0.81160211, 0.92573741]),
                       'bed': np.array([1.876858, 1.84255952, 1.19315654]),
                       'chair': np.array([0.61327999, 0.61486087, 0.71827014]),
                       'sofa': np.array([1.39550063, 1.51215451, 0.83443565]),
                       'table': np.array([0.97949596, 1.06751485, 0.63296875]),
                       'door': np.array([0.53166301, 0.59555772, 1.75001483]),
                       'window': np.array([0.96247056, 0.72462326, 1.14818682]),
                       'bookshelf': np.array([0.83221924, 1.04909355, 1.68756634]),
                       'picture': np.array([0.21132214, 0.4206159 , 0.53728459]),
                       'counter': np.array([1.44400728, 1.89708334, 0.26985747]),
                       'desk': np.array([1.02942616, 1.40407966, 0.87554322]),
                       'curtain': np.array([1.37664116, 0.65521793, 1.68131292]),
                       'refrigerator': np.array([0.66508189, 0.71111926, 1.29885307]),
                       'showercurtain': np.array([0.41999174, 0.37906947, 1.75139715]),
                       'toilet': np.array([0.59359559, 0.59124924, 0.73919014]),
                       'sink': np.array([0.50867595, 0.50656087, 0.30136236]),
                       'bathtub': np.array([1.15115265, 1.0546296 , 0.49706794]),
                       'otherfurniture': np.array([0.47535286, 0.49249493, 0.58021168])
                        }

color_name_to_rgb = {
    'red': [1.0, 0.0, 0.0],
    'green': [0.0, 1.0, 0.23529411764705882],
    'blue': [0.0, 0.4705882352941176, 1.0],
    'purple': [0.7058823529411765, 0.0, 1.0],
    'orange': [1.0, 0.23529411764705882, 0.0],
    'lime': [0.23529411764705882, 1.0, 0.0],
    'cyan': [0.0, 0.7058823529411765, 1.0],
    'magenta': [0.4705882352941176, 0.0, 1.0],
    'pink': [1.0, 0.0, 0.4705882352941176],
    'yellow': [0.4705882352941176, 1.0, 0.0],
    'skyblue': [0.0, 0.9411764705882353, 1.0],
    'indigo': [0.23529411764705882, 0.0, 1.0],
    'hotpink': [1.0, 0.0, 0.7058823529411765],
    'chartreuse': [0.7058823529411765, 1.0, 0.0],
    'aquamarine': [0.0, 1.0, 0.7058823529411765],
    'royalblue': [0.0, 0.23529411764705882, 1.0],
    'violet': [0.9411764705882353, 0.0, 1.0],
    'gold': [1.0, 0.4705882352941176, 0.0],
    'black': [0.0, 0.0, 0.0],
}

class_to_color_name = {
    'bathtub': 'black',
    'bed': 'green',
    'bookshelf': 'blue',
    'cabinet': 'purple',
    'chair': 'orange',
    'counter': 'lime',
    'curtain': 'cyan',
    'desk': 'magenta',
    'door': 'pink',
    'otherfurniture': 'yellow',
    'picture': 'skyblue',
    'refrigerator': 'indigo',
    'showercurtain': 'hotpink',
    'sink': 'chartreuse',
    'sofa': 'aquamarine',
    'table': 'royalblue',
    'toilet': 'violet',
    'window': 'gold',
}
    

def flip_axis_to_camera(pc):
    ''' Flip X-right,Y-forward,Z-up to X-right,Y-down,Z-forward
    Input and output are both (N,3) array
    '''
    pc2 = np.copy(pc)
    pc2[...,[0,1,2]] = pc2[...,[0,2,1]] # cam X,Y,Z = depth X,-Z,Y
    pc2[...,1] *= -1
    return pc2

def flip_axis_to_depth(pc):
    pc2 = np.copy(pc)
    pc2[...,[0,1,2]] = pc2[...,[0,2,1]] # depth X,Y,Z = cam X,Z,-Y
    pc2[...,2] *= -1
    return pc2

# Function to create a bounding box LineSet from center, heading, and size
def create_bbox(center, heading, size, color):
    corners = np.array([
        [-size[0]/2, -size[1]/2, -size[2]/2],
        [ size[0]/2, -size[1]/2, -size[2]/2],
        [ size[0]/2,  size[1]/2, -size[2]/2],
        [-size[0]/2,  size[1]/2, -size[2]/2],
        [-size[0]/2, -size[1]/2,  size[2]/2],
        [ size[0]/2, -size[1]/2,  size[2]/2],
        [ size[0]/2,  size[1]/2,  size[2]/2],
        [-size[0]/2,  size[1]/2,  size[2]/2],
    ])
    
    # Rotate the corners according to the heading
    cos_theta = np.cos(heading)
    sin_theta = np.sin(heading)
    rotation_matrix = np.array([
        [1, 0, 0],
        [0, cos_theta, -sin_theta],
        [0, sin_theta, cos_theta]
    ])
    rotated_corners = np.dot(corners, rotation_matrix.T)
    
    # Translate corners to the center
    corners =  center - rotated_corners
    
    # Create LineSet
    lines = [
        [0, 1], [1, 2], [2, 3], [3, 0],  # Bottom face
        [4, 5], [5, 6], [6, 7], [7, 4],  # Top face
        [0, 4], [1, 5], [2, 6], [3, 7]   # Vertical lines
    ]
    colors = [color for _ in range(len(lines))]  # Red color for all lines
    line_set = o3d.geometry.LineSet(
        points=o3d.utility.Vector3dVector(corners),
        lines=o3d.utility.Vector2iVector(lines),
    )
    line_set.colors = o3d.utility.Vector3dVector(colors)
    
    return line_set


# Function to create a bounding box LineSet from corners
def create_bbox_from_corners(corners, color):
    lines = [
        [0, 1], [1, 2], [2, 3], [3, 0],  # Bottom face
        [4, 5], [5, 6], [6, 7], [7, 4],  # Top face
        [0, 4], [1, 5], [2, 6], [3, 7]   # Vertical lines
    ]
    colors = [color for _ in range(len(lines))]  # Same color for all lines
    line_set = o3d.geometry.LineSet(
        points=o3d.utility.Vector3dVector(corners),
        lines=o3d.utility.Vector2iVector(lines),
    )
    line_set.colors = o3d.utility.Vector3dVector(colors)
    
    return line_set

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [8]:
def get_gt_bboxes(img_id):
    
    test_dataset = ScannetValDataset(all_classes=True,
                                    num_novel_class=9,
                                    num_points=40000,
                                    augment=False)
    
    data = test_dataset[img_id]
    
    center_label = data["center_label"]
    heading_class_label = data["heading_class_label"]
    heading_residual_label = data["heading_residual_label"]
    size_residual_label = data["size_residual_label"]
    sem_cls_label = data["sem_cls_label"]
    pcd = data["point_clouds"]
    
    pcd_flipped = flip_axis_to_camera(pcd)
    pcd_flipped = flip_axis_to_depth(pcd_flipped)
    
    print("pcd_flipped shape: ", pcd_flipped.shape)
    
    # Get number of GT bboxes:
    scanname = test_dataset.scan_names[img_id]
    gt_filepath= f"../val_pred_bboxes/{scanname}_gt.npy"
    print(gt_filepath)
    
    gt = np.load(gt_filepath, allow_pickle=True) # gt bounding boxes for each scene

    gt_bounding_boxes = []

    for box_dict in gt:
        bbox = box_dict['bbox']
        gt_bounding_boxes.append(bbox)
        
    print("No. of GT bboxes: ", len(gt_bounding_boxes))
    
    centers = []
    
    for i in range(len(gt_bounding_boxes)):
        center = center_label[i]
        heading = heading_class_label[i] * (2 * np.pi / 12) + heading_residual_label[i]
        classname = classes[sem_cls_label[i]]
        size = box_sizes[classname] + size_residual_label[i]
        centers.append((i, center, classname))
        
        
    return centers
    
    
    
    

In [9]:
centers = get_gt_bboxes(46)     # List( (index, center, classname) )

VALIDATION_DATA_PATH: /home/peisheng/SDCoT/scannet/scannet_val_detection_data
Testing classes: ['bathtub', 'bed', 'bookshelf', 'cabinet', 'chair', 'counter', 'curtain', 'desk', 'door', 'otherfurniture', 'picture', 'refrigerator', 'showercurtain', 'sink', 'sofa', 'table', 'toilet', 'window'] | number of scenes: 312
pcd_flipped shape:  (40000, 3)
../val_pred_bboxes/scene0146_00_gt.npy
No. of GT bboxes:  4


## Finding the dimensions and center of the scene

In [18]:
def get_scene_dimensions_center(img_id):
    
    test_dataset = ScannetValDataset(all_classes=True,
                                    num_novel_class=9,
                                    num_points=40000,
                                    augment=False)
    
    data = test_dataset[img_id]
    
    pcd = data["point_clouds"]
    pcd_flipped = flip_axis_to_camera(pcd)
    pcd_flipped = flip_axis_to_depth(pcd_flipped)
    
    # Compute the minimum and maximum values along each axis
    min_coords = np.min(pcd_flipped, axis=0)
    max_coords = np.max(pcd_flipped, axis=0)
    
    # Calculate the dimensions of the scene (length, breadth, height)
    dimensions = max_coords - min_coords
    
    x, y, z = dimensions
    print(f"x: {x}")
    print(f"y: {y}")
    print(f"z: {z}")
    
    # Calculate the center of the scene
    center = (max_coords + min_coords) / 2
    print(f"Center: {center}")
    
    # Plot the point cloud and the center of the scene
    point_cloud = o3d.geometry.PointCloud()
    point_cloud.points = o3d.utility.Vector3dVector(pcd_flipped)
    
    gt_entities = [point_cloud]
    
    # Create a sphere at the center of the scene
    sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.1)
    
    # Translate the sphere to the center of the scene
    sphere.translate(center)
    
    # Set the color of the sphere to red
    sphere.paint_uniform_color([1, 0, 0])
    
    # Add the sphere to the list of entities
    gt_entities.append(sphere)
    
    o3d.visualization.draw_plotly(gt_entities)
    
    return dimensions, center

In [19]:
get_scene_dimensions_center(46)

VALIDATION_DATA_PATH: /home/peisheng/SDCoT/scannet/scannet_val_detection_data
Testing classes: ['bathtub', 'bed', 'bookshelf', 'cabinet', 'chair', 'counter', 'curtain', 'desk', 'door', 'otherfurniture', 'picture', 'refrigerator', 'showercurtain', 'sink', 'sofa', 'table', 'toilet', 'window'] | number of scenes: 312
x: 2.0394976139068604
y: 3.0807061195373535
z: 2.0108137130737305
Center: [ 0.0605692  -0.12161553  0.96765846]


(array([2.0394976, 3.0807061, 2.0108137], dtype=float32),
 array([ 0.0605692 , -0.12161553,  0.96765846], dtype=float32))