In [1]:
combined_keyframes_metadata_filename = None
keyframes_metadata_dir = None

combined_object_extraction_filename = None
object_extraction_dir = None

final_metadata_filename = None

In [2]:
import os
import json

dir_path = os.getcwd()

In [3]:
data_extraction_path = f'{dir_path}/data_extraction'
dataset_path = f'{data_extraction_path}/dataset/AIC_Video'

if not combined_keyframes_metadata_filename:
    combined_keyframes_metadata_filename = 'keyframes_metadata.json'
    
if not keyframes_metadata_dir:
    keyframes_metadata_dir = f'{data_extraction_path}/transnet/Keyframes_Metadata'
    
if not combined_object_extraction_filename:
    combined_object_extraction_filename = 'object_extraction_metadata.json'
    
if not object_extraction_dir:
    object_extraction_dir = f'{data_extraction_path}/metadata/object_extraction/object_detection'
    
if not final_metadata_filename:
    final_metadata_filename = 'final_metadata.json'

# Combine keyframe metadata

In [4]:
def combine_keyframe_metadata_json_files(directory, output_file):
    combined_data = {}

    if os.path.exists(output_file):
        with open(output_file, 'r') as existing_file:
            combined_data = json.load(existing_file)

    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            file_path = os.path.join(directory, filename)

            with open(file_path, 'r') as file:
                data = json.load(file)
                
            combined_data.update(data)

    with open(output_file, 'w') as outfile:
        json.dump(combined_data, outfile)
    
    print(f'Combined keyframe metadata successful: {output_file}')
    

combine_keyframe_metadata_json_files(keyframes_metadata_dir,
                   combined_keyframes_metadata_filename)

Combined keyframe metadata successful: keyframes_metadata.json


# Combine object extraction metadata

In [5]:
def preprocess_object_detection(data):
    organized_data = {'objects': dict(), 'counts': dict()}
    for item in data:
        label = item['label']
        if label not in organized_data['objects']:
            organized_data['objects'][label] = []
            organized_data['counts'][label] = 0
        organized_data['objects'][label].append({
            "score": item['score'],
            "box": item['box']
        })
        organized_data['counts'][label] += 1

    return organized_data

def combine_object_extraction_metadata_json_files(directory, output_file):
    combined_data = {}

    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('_detection.json'):
                file_path = os.path.join(root, file)

                parts = root.split(os.sep)
                l_folder = parts[-2]  # L01, L02, etc.
                video_folder = parts[-1]  # V001, V002, etc.
                frame_id = file.split('_')[0]  # 000139, etc.
                key = f"{l_folder}_{video_folder}_{frame_id}_detection"

                with open(file_path, 'r') as f:
                    data = json.load(f)

                combined_data[key] = preprocess_object_detection(
                    data)

    with open(output_file, 'w') as f:
        json.dump(combined_data, f)

    print(f'Combined object detection metadata successful: {output_file}')
    

combine_object_extraction_metadata_json_files(object_extraction_dir, combined_object_extraction_filename)

Combined object detection metadata successful: object_extraction_metadata.json


# Combined final file

In [6]:
def combined_json_file(json_files, output_file):
    combined_data = {}
    
    for file_name, data in json_files.items():
        path = data['path']
        key_ext = data.get('key_extension', '')
        with open(path, 'r') as f:
            json_data = json.load(f)
        print(f"Preprocessing metadata file: {file_name}")
        for key, data in json_data.items():
            if key_ext:
                key = key.replace(f'_{key_ext}', '')
            if key not in combined_data:
                combined_data[key] = {}
                
            combined_data[key][key_ext] = data
            
    with open(output_file, 'w') as f:
        json.dump(combined_data, f)

    print(f'Combined final metadata successful: {output_file}')

In [7]:
json_files = {
    'keyframe_metadata': {
        'path': combined_keyframes_metadata_filename,
        'key_extension': 'keyframe',
    },
    'object_extraction': {
        'path': combined_object_extraction_filename,
        'key_extension': 'detection'
    }
}

combined_json_file(json_files, final_metadata_filename)

Preprocessing metadata file: keyframe_metadata
Preprocessing metadata file: object_extraction


Combined final metadata successful: final_metadata.json
