Format of the file:
```
{
  "scenes": [
    {
      "scene_id": "XXXXXXXXX",
      "scene_matrix": [20, 309], #  --> (text_emb, location, euler_angles, sizes)
      "graph_objects": [20, 300], # --> (text_emb)
      "graph_edges": [2, 50], # --> (o1, o2)
      "graph_relationships": [50] # --> int
    },
    {
      "scene_id": "YYYYYYYYYYY",
      ...
    }
    ...
  ]
}

```

In [1]:
import os
import json
import numpy as np
from scipy.spatial.transform import Rotation
#from text_encoder.py import FastTextEncoder

In [48]:
# Specify the file path where you want to save the JSON data
save_path = '3RScan/data/test.json'

# File path to scene folders
path_to_scene_folders = '3RScan/data/train'
path_to_relationships = '3RScan/data/relationships.json'

In [None]:
# Specify Text Encoder
text_encoder = FastTextEncoder()

We filter:
* max number of objects per scene = 20
* allowed object labels: 
('chair', 'pillow', 'box', 'shelf', 'lamp', 'table', 'door', 'curtain', 'picture', 'cabinet', 'bag', 'light', 'armchair', 'clothes', 'stool', 'kitchen cabinet', 'towel', 'sink', 'blanket', 'commode', 'trash can', 'heater', 'wardrobe', 'bed', 'bench', 'desk', 'sofa', 'monitor', 'basket', 'cushion', 'tv', 'nightstand', 'coffee table', 'mirror', 'bath cabinet', 'rack', 'toilet', 'kitchen counter', 'shoes', 'radiator', 'clutter', 'frame', 'decoration', 'backpack', 'stand', 'bucket', 'counter', 'couch', 'kitchen appliance', 'pc', 'stove', 'tv stand', 'vase', 'side table', 'clothes dryer', 'showcase', 'plate', 'oven', 'refrigerator', 'flower', 'book', 'washing machine', 'plank', 'pillar', 'clock', 'candle', 'bottle', 'telephone', 'bin', 'microwave', 'puf', 'couch table', 'whiteboard', 'laptop', 'shower', 'toilet paper', 'bowl', 'dining chair', 'cupboard', 'roll', 'suitcase', 'desk chair', 'bathtub', 'stairs', 'organizer', 'shower curtain', 'pipe', 'bookshelf', 'bedside table', 'printer', 'boxes', 'toilet brush', 'kitchen towel', 'laundry basket', 'kettle', 'pack', 'stuffed animal', 'carpet', 'soap dispenser', 'ottoman')

In [73]:
def extract_and_filter_semseg(scan_folder):
    scan_folder_path = os.path.join(path_to_scene_folders, scan_folder)
    
    # Check if the folder contains semseg.v2.json file
    semseg_file = os.path.join(scan_folder_path, 'semseg.v2.json')
    if not os.path.isfile(semseg_file):
        print("This folder has no semseg file: ", scan_folder)

    # Read and parse the semseg.v2.json file
    with open(semseg_file, 'r') as file:
        semseg_data = json.load(file)
        
    # Define allowed labels
    allowed_labels = ['chair', 'pillow', 'box', 'shelf', 'lamp', 'table', 'door', 'curtain', 'picture', 'cabinet', 'bag', 'light', 'armchair', 'clothes', 'stool', 'kitchen cabinet', 'towel', 'sink', 'blanket', 'commode', 'trash can', 'heater', 'wardrobe', 'bed', 'bench', 'desk', 'sofa', 'monitor', 'basket', 'cushion', 'tv', 'nightstand', 'coffee table', 'mirror', 'bath cabinet', 'rack', 'toilet', 'kitchen counter', 'shoes', 'radiator', 'clutter', 'frame', 'decoration', 'backpack', 'stand', 'bucket', 'counter', 'couch', 'kitchen appliance', 'pc', 'stove', 'tv stand', 'vase', 'side table', 'clothes dryer', 'showcase', 'plate', 'oven', 'refrigerator', 'flower', 'book', 'washing machine', 'plank', 'pillar', 'clock', 'candle', 'bottle', 'telephone', 'bin', 'microwave', 'puf', 'couch table', 'whiteboard', 'laptop', 'shower', 'toilet paper', 'bowl', 'dining chair', 'cupboard', 'roll', 'suitcase', 'desk chair', 'bathtub', 'stairs', 'organizer', 'shower curtain', 'pipe', 'bookshelf', 'bedside table', 'printer', 'boxes', 'toilet brush', 'kitchen towel', 'laundry basket', 'kettle', 'pack', 'stuffed animal', 'carpet', 'soap dispenser', 'ottoman']

    # Define number of objects
    max_N = 5
    
    seg_groups = semseg_data['segGroups']
    
    extracted_objects = []
    number_objects = 0

    # Extract individual objects
    for object_data in seg_groups:
        obj = {}
    
        
        if (object_data['label'] in allowed_labels) and number_objects < max_N:
            obj['label'] = object_data['label']
            obj['id'] = object_data['objectId']
            obj['location'] = object_data['obb']['centroid']
            obj['size'] = object_data['obb']['axesLengths']

            normalized_axes = object_data['obb']['normalizedAxes']
            rotation_matrix = np.transpose(np.array(normalized_axes).reshape(3, 3))
            rotation = Rotation.from_matrix(rotation_matrix)
            euler_angles = rotation.as_euler('xyz', degrees=False) # degrees in radians
            obj['euler_angles'] = euler_angles
            
            extracted_objects.append(obj)
            number_objects += 1

    
    return extracted_objects

What we filter:
* max number of relationships
* only objects we also have in extracted objects

In [74]:
def extract_and_filter_relationships(scan_folder, extracted_objects):
    # Define max. number of edges
    max_numb_edges = 10
    
    allowed_object_ids = {obj['id'] for obj in extracted_objects}
    object_vector = np.array(sorted(allowed_object_ids))
    
    with open(path_to_relationships) as f:
        relationships_data = json.load(f)

    scene_relationships = relationships_data['scans']

    # Find the relationships for the scene with the given id
    scene_relationships = [r for r in scene_relationships if r['scan'] == scan_folder]
    print('Found {} relationships dictionary for scene {}'.format(len(scene_relationships), scan_folder))
    scene_relationships = scene_relationships[0]['relationships']
    
    # Filter out relationships with objects that are not in the scene_matrix
    filtered_relationships = [item for item in scene_relationships if item[0] in allowed_object_ids and item[1] in allowed_object_ids]
    print(f"Found {len(filtered_relationships)} relationships for scene {scan_folder}")
    
    # Clip the relationships to max number
    clipped_relationships = filtered_relationships[:max_numb_edges]
    
    # Enumerate objects in relationship data from 0 to ...
    mapping_numbers = {number: replacement for replacement, number in enumerate(object_vector, start=0)}
    modified_relationships = [[mapping_numbers[obj[0]], mapping_numbers[obj[1]], obj[2], obj[3]] for obj in clipped_relationships]
    
    # Create object matrix
    mapping_labels = {obj['id']: obj['label'] for obj in extracted_objects}
    label_vector = [mapping_labels[num] for num in object_vector]
    #object_matrix = [text_encoder.encode(label) for label in label_vector]
    
    # Create edge matrix
    edge_matrix_np = np.stack([obj[:2] for obj in modified_relationships]).T
    edge_matrix = edge_matrix_np.tolist()

    # Create relationship matrix
    relationship_vector = [obj[2] for obj in modified_relationships]

    #return(object_matrix, )
    return(edge_matrix, relationship_vector)


In [79]:
def build_scene_matrix(extracted_objects):
    '''
    # Label Embedding
    # Extract the labels from the data
    labels = [obj['label'] for obj in extracted_objects]
    # Encode the labels using text_encoder.encode()
    embeddings = [text_encoder.encode(label) for label in labels]
    '''
    
    # Extract values from the dataset
    locations = np.array([obj['location'] for obj in extracted_objects])
    euler_angles = np.array([obj['euler_angles'] for obj in extracted_objects])
    sizes = np.array([obj['size'] for obj in extracted_objects])

    # Create the matrix
    stacked_matrix = np.hstack((locations, euler_angles, sizes))
    
    # Determine the desired number of rows
    desired_rows = 5

    # Add rows of zeros to each matrix
    scene_matrix_np = np.pad(stacked_matrix, [(0, desired_rows - stacked_matrix.shape[0]), (0, 0)], mode='constant')
    scene_matrix = scene_matrix_np.tolist()
    
    return scene_matrix


    

In [80]:
scenes = []
i=0

# Iterate over the folders
for scan_folder in os.listdir(path_to_scene_folders):
    if i>2:
        break
    if os.path.isdir(os.path.join(path_to_scene_folders, scan_folder)):
        scene_data = {}
        
        extracted_objects = extract_and_filter_semseg(scan_folder)
        edge_matrix, relationship_vector = extract_and_filter_relationships(scan_folder, extracted_objects)
        #print(extracted_objects)
        
        scene_data['scene_id'] = scan_folder
        scene_data['scene_matrix'] = build_scene_matrix(extracted_objects)
        scene_data['graph_objects'] = 2
        scene_data['graph_edges'] = edge_matrix
        scene_data['graph_relationships'] = relationship_vector
        scenes.append(scene_data)
    i+=1
    #print(scene_data)

data = {"scenes": scenes}
print(data)



Found 1 relationships dictionary for scene f4f31600-8408-2255-971c-b8c20605563a
Found 11 relationships for scene f4f31600-8408-2255-971c-b8c20605563a
Found 1 relationships dictionary for scene b8837e3a-57ec-29c6-8b54-d440ca79a11f
Found 27 relationships for scene b8837e3a-57ec-29c6-8b54-d440ca79a11f
Found 1 relationships dictionary for scene b901681d-e754-293c-8cb3-22aae72dbd42
Found 4 relationships for scene b901681d-e754-293c-8cb3-22aae72dbd42
{'scenes': [{'scene_id': 'f4f31600-8408-2255-971c-b8c20605563a', 'scene_matrix': [[1.2627571815287477, 3.5120839649475206, -0.13000007808022507, 1.5707963267948968, 0.0, -0.34177145477279264, 0.9916166806447296, 2.719999967813491, 0.5723202049896078], [0.4879435954765271, 3.5344160595066842, -0.4616025621067208, 1.5707963267948968, 0.0, -0.5440600955940643, 0.8708287148221838, 0.4967949726403881, 0.46749919049829614], [0.4083856755914834, 3.5062693147846034, -1.0000000336002597, 1.5707963267948968, 0.0, -0.5518394953826442, 0.783675935902371, 0.

In [81]:
# Save data as a JSON file
with open(save_path, "w") as json_file:
    json.dump(data, json_file)