Format of the file:
```
{
  "scenes": [
    {
      "scene_id": "XXXXXXXXX",
      "scene_matrix": [20, 315], #  --> (text_emb, location, normalized_axes, sizes)
      "graph_objects": [20, 300], # --> (text_emb)
      "graph_edges": [2, X], # --> (o1, o2), X<=25
      "graph_relationships": [X] # --> int
    },
    {
      "scene_id": "YYYYYYYYYYY",
      ...
    }
    ...
  ]
}
```

Possible Disadvantages of this approach:
* We have some scenes with very few objects but still a high number of edges (e.g. 3 objects, 14 edges)
* When trying to find one relationship for every object in the scene, we always start searching from the beginning, therefore relationships with objects with low object id are naturally overrepresented
* When 'filling up' remaining edges, we simply start from the beginning, therefore edge connections to objects with low object ids are overrepresented
* a non-neglectable share of scenes has no relationships after all filtering steps and therefore needs to be taken out

In [1]:
import os
import json
import numpy as np
from scipy.spatial.transform import Rotation
from text_encoder import FastTextEncoder

In [2]:
# Specify the file path where you want to save the JSON data
save_path = '3RScan/data/test_small.json'

# File path to scene folders
path_to_scene_folders = '3RScan/data/train'
path_to_relationships = '3RScan/data/relationships.json'

In [3]:
# Specify Text Encoder
text_encoder = FastTextEncoder()



We filter:
* max number of objects per scene = 20
* objects with non-realistic locations
* allowed object labels: 
('chair', 'shelf', 'lamp', 'table', 'cabinet', 'light', 'armchair', 'stool', 'kitchen cabinet', 'sink', 'commode', 'trash can', 'heater', 'wardrobe', 'bed', 'bench', 'desk', 'sofa', 'monitor', 'tv', 'nightstand', 'coffee table', 'bath cabinet', 'rack', 'toilet', 'kitchen counter', 'radiator', 'stand', 'counter', 'couch', 'pc', 'stove', 'tv stand', 'side table', 'clothes dryer', 'showcase', 'oven', 'refrigerator', 'washing machine', 'bin', 'couch table', 'shower', 'dining chair', 'cupboard', 'desk chair', 'bathtub', 'organizer', 'shower curtain', 'bookshelf', 'bedside table', 'carpet')
--> ACHTUNG: drastically reduced to 51 objects!!!!!!

In [26]:
def onehot(label_vector):
    object_labels = ['chair', 'shelf', 'lamp', 'table', 'cabinet', 'light', 'armchair', 'stool', 'kitchen cabinet', 'sink', 'commode', 'trash can', 'heater', 'wardrobe', 'bed', 'bench', 'desk', 'sofa', 'monitor', 'tv', 'nightstand', 'coffee table', 'bath cabinet', 'rack', 'toilet', 'kitchen counter', 'radiator', 'stand', 'counter', 'couch', 'pc', 'stove', 'tv stand', 'side table', 'clothes dryer', 'showcase', 'oven', 'refrigerator', 'washing machine', 'bin', 'couch table', 'shower', 'dining chair', 'cupboard', 'desk chair', 'bathtub', 'organizer', 'shower curtain', 'bookshelf', 'bedside table', 'carpet']
    num_labels = len(object_labels)
    encoded_vector = np.zeros((len(label_vector), num_labels))
    label_indices = {label: index for index, label in enumerate(object_labels)}
    
    for i, obj in enumerate(label_vector):
        if obj in label_indices:
            label_index = label_indices[obj]
            encoded_vector[i, label_index] = 1
    
    return encoded_vector

In [27]:
def extract_and_filter_semseg(scan_folder):
    
    '''
    input:
        scan_folder name
        
    output:
        extracted_objects = [{'label', 'id', 'location', 'size', 'normalized_axes'}, {'label',...},...] --> N<=20 objects
        
    what happens:
        - load semseg.v2.json file that corresponds to the specific scene_id
        - extract max. 20 objects, which have labels that are allowed
        - object data: label, id, location, size, normalized axes
    '''
    
    scan_folder_path = os.path.join(path_to_scene_folders, scan_folder)
    
    # Check if the folder contains semseg.v2.json file
    semseg_file = os.path.join(scan_folder_path, 'semseg.v2.json')
    if not os.path.isfile(semseg_file):
        print("This folder has no semseg file: ", scan_folder)

    # Read and parse the semseg.v2.json file
    with open(semseg_file, 'r') as file:
        semseg_data = json.load(file)
        
    # Define allowed labels
    allowed_labels = ['chair', 'shelf', 'lamp', 'table', 'cabinet', 'light', 'armchair', 'stool', 'kitchen cabinet', 'sink', 'commode', 'trash can', 'heater', 'wardrobe', 'bed', 'bench', 'desk', 'sofa', 'monitor', 'tv', 'nightstand', 'coffee table', 'bath cabinet', 'rack', 'toilet', 'kitchen counter', 'radiator', 'stand', 'counter', 'couch', 'pc', 'stove', 'tv stand', 'side table', 'clothes dryer', 'showcase', 'oven', 'refrigerator', 'washing machine', 'bin', 'couch table', 'shower', 'dining chair', 'cupboard', 'desk chair', 'bathtub', 'organizer', 'shower curtain', 'bookshelf', 'bedside table', 'carpet']

    # Define number of objects
    max_N = 20
    
    seg_groups = semseg_data['segGroups']
    
    extracted_objects = []
    number_objects = 0

    # Extract individual objects
    for object_data in seg_groups:
        obj = {}
    
        
        if (object_data['label'] in allowed_labels) and number_objects < max_N \
        and (np.abs(object_data['obb']['centroid'][0])<20 and np.abs(object_data['obb']['centroid'][1])<20 and np.abs(object_data['obb']['centroid'][2])<20):
            obj['label'] = object_data['label']
            obj['id'] = object_data['objectId']
            obj['location'] = object_data['obb']['centroid']
            obj['size'] = object_data['obb']['axesLengths']
            obj['normalized_axes'] = object_data['obb']['normalizedAxes']
            
            extracted_objects.append(obj)
            number_objects += 1

    
    return extracted_objects

In [28]:
def choose_relationships(object_vector, filtered_relationships):
    
    '''
    input:
        object_vector --> N<=20 --> ids of all objects in our scene
        filtered_relationships --> [[o1, 02, r, 'relationship'], [o1, ...], ...] --> unlimited number of relationships
    
    output:
        subset --> [[o1, 02, r, 'relationship'], [o1, ...], ...] --> number of relationships <=25
    
    what happens:
        idea:
            we want all objects to be connected to at least one other object in the scene
        implementation:
            - for every object in our scene we try to find a relationship that goes from or to this object
            - as long as the number of relationships in our subset is below 25 we try to fill up remaining spots with other relationships
    '''
    subset = []
    
    # Define max number of relationships
    max_R = 25
    
    # Filter the data based on the vector
    for value in object_vector:
        for obj in filtered_relationships:
            if (obj[0] == value or obj[1] == value) and obj not in subset:
                subset.append(obj)
                break


        if len(subset) >= max_R:
            break

    # Check if subset needs to be filled with remaining objects
    if len(subset) < max_R and len(subset) < len(filtered_relationships):
        for obj in filtered_relationships:
            if obj not in subset:
                subset.append(obj)
                if len(subset) >= max_R or len(subset) >= len(filtered_relationships):
                    break

    return subset


In [29]:
def map_values_to_indices(relationship_list, all_relationships):
    
    '''
    input:
        relationship_list ['relationship', 'relationship',...] --> number of objects <= 25
        all_relationships ['relationship', 'relationship',...] --> number of objects = 23
    
    output:
        mapped_indices [int, int, ...] --> N <= 25
        
    what happens:
        - all allowed relationship types are enumerated
        - we map the relationships of our list to the corresponding numbers
    '''
    
    value_indices = {}
 
    for i, value in enumerate(all_relationships):
        value_indices[value] = i+1
    
    mapped_indices = []
    for value in relationship_list:
        if value in value_indices:
            mapped_indices.append(value_indices[value])
    
    return mapped_indices


What we filter:
* max number of relationships
* only objects we also have in extracted objects
* allowed relationships:
[ 'left', 'right', 'close by', 'behind', 'front', 'attached to', 'standing on', 'lower than', 'higher than', 'lying on', 'smaller than', 'bigger than', 'hanging on', 'supported by', 'standing in', 'leaning against', 'build in', 'lying in', 'connected to', 'belonging to', 'cover', 'part of', 'hanging in']

In [30]:
def extract_and_filter_relationships(scan_folder, extracted_objects):
    
    '''
    input:
        scan_folder
        exracted_objects = [{'label', 'id', 'location', 'size', 'normalized_axes'}, {'label',...},...] --> N<=20 objects
    
    output:
        object_matrix --> [20, 300]
        edge_matrix --> [2, X] X<=25
        relationship_vector --> [X]
        
    what happens:
        - we load the relationship data corresponding to our scene id
        - we only consider relationships, where both objects are in the extracted objects and where the relationship type is within the allowed set
        - we create a relationship subset with max. 25 relationships
        - we map the object ids to an enumeration to create a useful edge matrix
        - create object matrix by encoding the labels and pad until we get 20 rows
        - create edge matrix from relationship subset
        - create relationship vector by mapping the relationship to an integer
    '''
    
    allowed_object_ids = {obj['id'] for obj in extracted_objects}
    allowed_relationships = ['left', 'right', 'close by', 'behind', 'front', 'attached to', 'standing on', 'lower than', 'higher than', 'lying on', 'smaller than', 'bigger than', 'hanging on', 'supported by', 'standing in', 'leaning against', 'build in', 'lying in', 'connected to', 'belonging to', 'cover', 'part of', 'hanging in']
    
    object_vector = np.array(sorted(allowed_object_ids))
    
    with open(path_to_relationships) as f:
        relationships_data = json.load(f)

    scene_relationships = relationships_data['scans']

    # Find the relationships for the scene with the given id
    scene_relationships = [r for r in scene_relationships if r['scan'] == scan_folder]
    scene_relationships = scene_relationships[0]['relationships']
    
    # Filter out relationships with objects that are not in the scene_matrix
    filtered_relationships = [item for item in scene_relationships if item[0] in allowed_object_ids and item[1] in allowed_object_ids and item[3] in allowed_relationships]
    
    # Clip the relationships to max number
    relationship_subset = choose_relationships(object_vector, filtered_relationships)
    
    # Enumerate objects in relationship data from 0 to ...
    mapping_numbers = {number: replacement for replacement, number in enumerate(object_vector, start=0)}
    modified_relationships = [[mapping_numbers[obj[0]], mapping_numbers[obj[1]], obj[2], obj[3]] for obj in relationship_subset]
    
    if len(modified_relationships) != 0:
        ok = True
        
        # Create object matrix
        mapping_labels = {obj['id']: obj['label'] for obj in extracted_objects}
        label_vector = [mapping_labels[num] for num in object_vector]
        # Text embeddings of Label Vector
        label_encodings = np.asarray([text_encoder.encode(label)[0] for label in label_vector])
        # Determine the desired number of rows
        desired_rows = 20
        # Add rows of zeros to each matrix
        object_matrix_np = np.pad(label_encodings, [(0, desired_rows - label_encodings.shape[0]), (0, 0)], mode='constant')
        object_matrix = object_matrix_np.tolist()

        # Create edge matrix
        edge_matrix_np = np.stack([obj[:2] for obj in modified_relationships]).T
        edge_matrix = edge_matrix_np.tolist()

        # Create relationship matrix
        relationship_list = [obj[3] for obj in modified_relationships]
        relationship_vector = map_values_to_indices(relationship_list, allowed_relationships)
    
    else:
        ok = False
        object_matrix = 0
        edge_matrix = 0
        relationship_vector = 0


    return(object_matrix, edge_matrix, relationship_vector, ok)


In [31]:
def build_scene_matrix(extracted_objects):
    
    '''
    input:
        extracted_objects = [{'label', 'id', 'location', 'size', 'normalized_axes'}, {'label',...},...] --> N<=20 objects
    
    output:
        scene_matrix --> [20, 315] -->(label_encodings, locations, normalized_axes, sizes)
        
    what happens:
    - extract the data from extracted_objects that's relevant for the scene matrix
    - encode label with text encoder
    - stack embedding together with locations, normalized axes and sizes in a matrix
    - pad rows of the matrix such that we reach N=20
    
    '''

    # Label Embedding
    # Extract the labels from the data
    labels = [obj['label'] for obj in extracted_objects]
    # Encode the labels using text_encoder
    # Alt. 1 Text Embedding of Labels
    label_encodings = np.asarray([text_encoder.encode(label)[0] for label in labels])
    # Alt. 2 One-hot Encoding of Labels
    #label_encodings = onehot(labels)
    
    # Extract values from the dataset
    locations = np.array([obj['location'] for obj in extracted_objects])
    normalized_axes = np.array([obj['normalized_axes'] for obj in extracted_objects])
    sizes = np.array([obj['size'] for obj in extracted_objects])

    # Create the matrix
    stacked_matrix = np.hstack((label_encodings, locations, normalized_axes, sizes))
    
    # Determine the desired number of rows
    desired_rows = 20

    # Add rows of zeros to each matrix
    scene_matrix_np = np.pad(stacked_matrix, [(0, desired_rows - stacked_matrix.shape[0]), (0, 0)], mode='constant')
    scene_matrix = scene_matrix_np.tolist()
    
    return scene_matrix


    

In [32]:
scenes = []
i=0

# Iterate over the folders
for scan_folder in os.listdir(path_to_scene_folders):
    print(scan_folder)
  
    if os.path.isdir(os.path.join(path_to_scene_folders, scan_folder)):
        scene_data = {}
        
        extracted_objects = extract_and_filter_semseg(scan_folder)
        
        if len(extracted_objects) == 0:
            print('No objects in ', scan_folder)
            continue
            
        object_matrix, edge_matrix, relationship_vector, ok = extract_and_filter_relationships(scan_folder, extracted_objects)
        
        if not ok:
            print('No relations in ', scan_folder)
            continue
        
        scene_data['scene_id'] = scan_folder
        scene_data['scene_matrix'] = build_scene_matrix(extracted_objects)
        scene_data['graph_objects'] = object_matrix
        scene_data['graph_edges'] = edge_matrix
        scene_data['graph_relationships'] = relationship_vector
        scenes.append(scene_data)
    i+=1

data = {"scenes": scenes}




f4f31600-8408-2255-971c-b8c20605563a
{'chair': 0, 'shelf': 1, 'lamp': 2, 'table': 3, 'cabinet': 4, 'light': 5, 'armchair': 6, 'stool': 7, 'kitchen cabinet': 8, 'sink': 9, 'commode': 10, 'trash can': 11, 'heater': 12, 'wardrobe': 13, 'bed': 14, 'bench': 15, 'desk': 16, 'sofa': 17, 'monitor': 18, 'tv': 19, 'nightstand': 20, 'coffee table': 21, 'bath cabinet': 22, 'rack': 23, 'toilet': 24, 'kitchen counter': 25, 'radiator': 26, 'stand': 27, 'counter': 28, 'couch': 29, 'pc': 30, 'stove': 31, 'tv stand': 32, 'side table': 33, 'clothes dryer': 34, 'showcase': 35, 'oven': 36, 'refrigerator': 37, 'washing machine': 38, 'bin': 39, 'couch table': 40, 'shower': 41, 'dining chair': 42, 'cupboard': 43, 'desk chair': 44, 'bathtub': 45, 'organizer': 46, 'shower curtain': 47, 'bookshelf': 48, 'bedside table': 49, 'carpet': 50}
b8837e3a-57ec-29c6-8b54-d440ca79a11f
{'chair': 0, 'shelf': 1, 'lamp': 2, 'table': 3, 'cabinet': 4, 'light': 5, 'armchair': 6, 'stool': 7, 'kitchen cabinet': 8, 'sink': 9, 'commo

{'chair': 0, 'shelf': 1, 'lamp': 2, 'table': 3, 'cabinet': 4, 'light': 5, 'armchair': 6, 'stool': 7, 'kitchen cabinet': 8, 'sink': 9, 'commode': 10, 'trash can': 11, 'heater': 12, 'wardrobe': 13, 'bed': 14, 'bench': 15, 'desk': 16, 'sofa': 17, 'monitor': 18, 'tv': 19, 'nightstand': 20, 'coffee table': 21, 'bath cabinet': 22, 'rack': 23, 'toilet': 24, 'kitchen counter': 25, 'radiator': 26, 'stand': 27, 'counter': 28, 'couch': 29, 'pc': 30, 'stove': 31, 'tv stand': 32, 'side table': 33, 'clothes dryer': 34, 'showcase': 35, 'oven': 36, 'refrigerator': 37, 'washing machine': 38, 'bin': 39, 'couch table': 40, 'shower': 41, 'dining chair': 42, 'cupboard': 43, 'desk chair': 44, 'bathtub': 45, 'organizer': 46, 'shower curtain': 47, 'bookshelf': 48, 'bedside table': 49, 'carpet': 50}
4acaebc0-6c10-2a2a-852e-0226d6539299
{'chair': 0, 'shelf': 1, 'lamp': 2, 'table': 3, 'cabinet': 4, 'light': 5, 'armchair': 6, 'stool': 7, 'kitchen cabinet': 8, 'sink': 9, 'commode': 10, 'trash can': 11, 'heater': 1

No relations in  55551077-36f1-29c0-89ec-2e7690991cb2
10b1795f-3938-2467-8aa6-c985377d32fb
{'chair': 0, 'shelf': 1, 'lamp': 2, 'table': 3, 'cabinet': 4, 'light': 5, 'armchair': 6, 'stool': 7, 'kitchen cabinet': 8, 'sink': 9, 'commode': 10, 'trash can': 11, 'heater': 12, 'wardrobe': 13, 'bed': 14, 'bench': 15, 'desk': 16, 'sofa': 17, 'monitor': 18, 'tv': 19, 'nightstand': 20, 'coffee table': 21, 'bath cabinet': 22, 'rack': 23, 'toilet': 24, 'kitchen counter': 25, 'radiator': 26, 'stand': 27, 'counter': 28, 'couch': 29, 'pc': 30, 'stove': 31, 'tv stand': 32, 'side table': 33, 'clothes dryer': 34, 'showcase': 35, 'oven': 36, 'refrigerator': 37, 'washing machine': 38, 'bin': 39, 'couch table': 40, 'shower': 41, 'dining chair': 42, 'cupboard': 43, 'desk chair': 44, 'bathtub': 45, 'organizer': 46, 'shower curtain': 47, 'bookshelf': 48, 'bedside table': 49, 'carpet': 50}
5341b7c3-8a66-2cdd-8651-225a7489523a


In [33]:
# Save data as a JSON file
with open(save_path, "w") as json_file:
    json.dump(data, json_file)