Format of the file:
```
{
  "scenes": [
    {
      "scene_id": "XXXXXXXXX",
      "scene_matrix": [20, 309], #  --> (text_emb, location, euler_angles, sizes)
      "graph_objects": [20, 300], # --> (text_emb)
      "graph_edges": [2, 50], # --> (o1, o2)
      "graph_relationships": [50] # --> int
    },
    {
      "scene_id": "YYYYYYYYYYY",
      ...
    }
    ...
  ]
}

```

In [7]:
import os
import json
import numpy as np
from scipy.spatial.transform import Rotation
#from text_encoder.py import FastTextEncoder

In [18]:
# Specify the file path where you want to save the JSON data
save_path = 'path/to/your/file.json'

# File path to scene folders
path_to_scene_folders = '3RScan/data/train'
path_to_relationships = '3RScan/data/relationships.json'

We filter:
* max number of objects per scene = 20
* allowed object labels: 
('chair', 'pillow', 'box', 'shelf', 'lamp', 'table', 'door', 'curtain', 'picture', 'cabinet', 'bag', 'light', 'armchair', 'clothes', 'stool', 'kitchen cabinet', 'towel', 'sink', 'blanket', 'commode', 'trash can', 'heater', 'wardrobe', 'bed', 'bench', 'desk', 'sofa', 'monitor', 'basket', 'cushion', 'tv', 'nightstand', 'coffee table', 'mirror', 'bath cabinet', 'rack', 'toilet', 'kitchen counter', 'shoes', 'radiator', 'clutter', 'frame', 'decoration', 'backpack', 'stand', 'bucket', 'counter', 'couch', 'kitchen appliance', 'pc', 'stove', 'tv stand', 'vase', 'side table', 'clothes dryer', 'showcase', 'plate', 'oven', 'refrigerator', 'flower', 'book', 'washing machine', 'plank', 'pillar', 'clock', 'candle', 'bottle', 'telephone', 'bin', 'microwave', 'puf', 'couch table', 'whiteboard', 'laptop', 'shower', 'toilet paper', 'bowl', 'dining chair', 'cupboard', 'roll', 'suitcase', 'desk chair', 'bathtub', 'stairs', 'organizer', 'shower curtain', 'pipe', 'bookshelf', 'bedside table', 'printer', 'boxes', 'toilet brush', 'kitchen towel', 'laundry basket', 'kettle', 'pack', 'stuffed animal', 'carpet', 'soap dispenser', 'ottoman')

In [19]:
def extract_and_filter_semseg(scan_folder):
    scan_folder_path = os.path.join(path_to_scene_folders, scan_folder)
    
    # Check if the folder contains semseg.v2.json file
    semseg_file = os.path.join(scan_folder_path, 'semseg.v2.json')
    if not os.path.isfile(semseg_file):
        print("This folder has no semseg file: ", scan_folder)

    # Read and parse the semseg.v2.json file
    with open(semseg_file, 'r') as file:
        semseg_data = json.load(file)
        
    # Define allowed labels
    allowed_labels = ['chair', 'pillow', 'box', 'shelf', 'lamp', 'table', 'door', 'curtain', 'picture', 'cabinet', 'bag', 'light', 'armchair', 'clothes', 'stool', 'kitchen cabinet', 'towel', 'sink', 'blanket', 'commode', 'trash can', 'heater', 'wardrobe', 'bed', 'bench', 'desk', 'sofa', 'monitor', 'basket', 'cushion', 'tv', 'nightstand', 'coffee table', 'mirror', 'bath cabinet', 'rack', 'toilet', 'kitchen counter', 'shoes', 'radiator', 'clutter', 'frame', 'decoration', 'backpack', 'stand', 'bucket', 'counter', 'couch', 'kitchen appliance', 'pc', 'stove', 'tv stand', 'vase', 'side table', 'clothes dryer', 'showcase', 'plate', 'oven', 'refrigerator', 'flower', 'book', 'washing machine', 'plank', 'pillar', 'clock', 'candle', 'bottle', 'telephone', 'bin', 'microwave', 'puf', 'couch table', 'whiteboard', 'laptop', 'shower', 'toilet paper', 'bowl', 'dining chair', 'cupboard', 'roll', 'suitcase', 'desk chair', 'bathtub', 'stairs', 'organizer', 'shower curtain', 'pipe', 'bookshelf', 'bedside table', 'printer', 'boxes', 'toilet brush', 'kitchen towel', 'laundry basket', 'kettle', 'pack', 'stuffed animal', 'carpet', 'soap dispenser', 'ottoman']

    seg_groups = semseg_data['segGroups']
    
    extracted_objects = []
    number_objects = 0

    # Extract individual objects
    for object_data in seg_groups:
        obj = {}
    
        
        if (object_data['label'] in allowed_labels) and number_objects < 20:
            obj['label'] = object_data['label']
            obj['id'] = object_data['objectId']
            obj['location'] = object_data['obb']['centroid']
            obj['size'] = object_data['obb']['axesLengths']

            normalized_axes = object_data['obb']['normalizedAxes']
            rotation_matrix = np.transpose(np.array(normalized_axes).reshape(3, 3))
            rotation = Rotation.from_matrix(rotation_matrix)
            euler_angles = rotation.as_euler('xyz', degrees=False) # degrees in radians
            obj['euler_angles'] = euler_angles
            
            extracted_objects.append(obj)
            number_objects += 1

    
    return extracted_objects

In [54]:
def extract_and_filter_relationships(scan_folder, extracted_objects):
    allowed_object_ids = {obj['id'] for obj in extracted_objects}
    
    with open(path_to_relationships) as f:
        relationships_data = json.load(f)

    scene_relationships = relationships_data['scans']

    # Find the relationships for the scene with the given id
    scene_relationships = [r for r in scene_relationships if r['scan'] == scan_folder]
    print('Found {} relationships dictionary for scene {}'.format(len(scene_relationships), scan_folder))
    scene_relationships = scene_relationships[0]['relationships']

    filtered_relationships = [item for item in scene_relationships if item[0] in allowed_object_ids and item[1] in allowed_object_ids]

    print(f"Found {len(filtered_relationships)} relationships for scene {scan_folder}")


    print(scene_relationships)


In [55]:
def build_scene_matrix(extracted_objects):
    '''
    # Label Embedding
    text_encoder = FastTextEncoder()
    # Extract the labels from the data
    labels = [obj['label'] for obj in extracted_objects]
    # Encode the labels using text_encoder.encode()
    embeddings = [text_encoder.encode(label) for label in labels]
    '''
    
    # Extract values from the dataset
    locations = np.array([obj['location'] for obj in extracted_objects])
    euler_angles = np.array([obj['euler_angles'] for obj in extracted_objects])
    sizes = np.array([obj['size'] for obj in extracted_objects])

    # Create the matrix
    stacked_matrix = np.hstack((locations, euler_angles, sizes))
    
    # Determine the desired number of rows
    desired_rows = 20

    # Add rows of zeros to each matrix
    scene_matrix = np.pad(stacked_matrix, [(0, desired_rows - stacked_matrix.shape[0]), (0, 0)], mode='constant')
    
    return scene_matrix


    

In [56]:
scenes = []
i=0

# Iterate over the folders
for scan_folder in os.listdir(path_to_scene_folders):
    if i>2:
        break
    if os.path.isdir(os.path.join(path_to_scene_folders, scan_folder)):
        scene_data = {}
        
        extracted_objects = extract_and_filter_semseg(scan_folder)
        extracted_relationships = extract_and_filter_relationships(scan_folder, extracted_objects)
        #print(extracted_objects)
        
        scene_data['scene_id'] = scan_folder
        scene_data['scene_matrix'] = build_scene_matrix(extracted_objects)
        scene_data['graph_objects'] = 2
        scene_data['graph_edges'] = 3
        scene_data['graph_relationships'] = 4
        scenes.append(scene_data)
    i+=1

data = {"scenes": scenes}



Found 1 relationships dictionary for scene f4f31600-8408-2255-971c-b8c20605563a
Found 134 relationships for scene f4f31600-8408-2255-971c-b8c20605563a
[[1, 2, 14, 'attached to'], [1, 7, 14, 'attached to'], [1, 8, 14, 'attached to'], [1, 9, 14, 'attached to'], [2, 11, 14, 'attached to'], [3, 2, 17, 'hanging on'], [4, 5, 15, 'standing on'], [5, 11, 15, 'standing on'], [6, 11, 15, 'standing on'], [7, 11, 14, 'attached to'], [8, 11, 14, 'attached to'], [9, 11, 14, 'attached to'], [10, 11, 15, 'standing on'], [12, 11, 15, 'standing on'], [13, 11, 15, 'standing on'], [14, 13, 15, 'standing on'], [15, 12, 15, 'standing on'], [16, 11, 15, 'standing on'], [17, 9, 14, 'attached to'], [18, 40, 21, 'belonging to'], [19, 11, 16, 'lying on'], [20, 11, 15, 'standing on'], [27, 16, 16, 'lying on'], [28, 16, 16, 'lying on'], [29, 16, 16, 'lying on'], [30, 16, 16, 'lying on'], [31, 11, 16, 'lying on'], [32, 11, 16, 'lying on'], [33, 11, 16, 'lying on'], [34, 11, 15, 'standing on'], [35, 10, 23, 'standin

Found 1 relationships dictionary for scene b8837e3a-57ec-29c6-8b54-d440ca79a11f
Found 106 relationships for scene b8837e3a-57ec-29c6-8b54-d440ca79a11f
[[3, 1, 15, 'standing on'], [4, 1, 15, 'standing on'], [6, 4, 16, 'lying on'], [8, 4, 16, 'lying on'], [9, 4, 16, 'lying on'], [10, 4, 16, 'lying on'], [11, 1, 15, 'standing on'], [19, 4, 16, 'lying on'], [20, 4, 16, 'lying on'], [19, 6, 3, 'right'], [19, 8, 3, 'right'], [19, 8, 5, 'behind'], [19, 9, 3, 'right'], [19, 10, 3, 'right'], [19, 10, 6, 'close by'], [19, 20, 6, 'close by'], [19, 20, 2, 'left'], [3, 4, 6, 'close by'], [3, 4, 2, 'left'], [3, 11, 3, 'right'], [3, 11, 6, 'close by'], [4, 3, 3, 'right'], [4, 3, 6, 'close by'], [4, 11, 3, 'right'], [4, 11, 6, 'close by'], [6, 19, 2, 'left'], [6, 8, 4, 'front'], [6, 8, 6, 'close by'], [6, 8, 2, 'left'], [6, 9, 6, 'close by'], [6, 9, 2, 'left'], [6, 10, 4, 'front'], [6, 10, 2, 'left'], [8, 19, 4, 'front'], [8, 19, 2, 'left'], [8, 6, 3, 'right'], [8, 6, 6, 'close by'], [8, 6, 5, 'behind