# Annotation Generation Tools

In [48]:
import json
import os
from collections import OrderedDict
import fnmatch


In [49]:
output_dir = '../../Annotations'
temp_output_dir = './temp'

task = "classification" # "classification" or "segmentation" or "cpr_quality"

# root_dir = '/standard/UVA-DSA/NIST EMS Project Data/EgoExoEMS_CVPR2025/Dataset/Final'  # Replace with your directory path
root_dir = '/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized'  # Replace with your directory path

# output_file = f'{temp_output_dir}/main_annotation.json' # temp
output_file = f'{temp_output_dir}/cars_main_annotation_{task}.json'
# output_file = f'{output_dir}/main_annotation_{task}.json'


In [50]:
import os
import fnmatch
from collections import OrderedDict

accepted_stream_types = [ 'Kinect', 'kinect', 'GoPro', 'gopro', 'distance_sensor_data', 'smartwatch_data', 'audio', 'i3d_flow', 'i3d_rgb', 'resnet50', 'resnet50-exo', 'clip_ego', 'clip_exo']

# Function to map stream names to stream types
def get_stream_type(directory_name):
    stream_mapping = {
        'Kinect': 'exocam_rgbd',
        'kinect': 'exocam_rgbd',
        'GoPro': 'egocam_rgb_audio',
        'gopro': 'egocam_rgb_audio',
        'distance_sensor_data': 'vl6180_ToF_depth',
        'smartwatch_data': 'smartwatch_imu',
        # Add more mappings if necessary
    }
    return stream_mapping.get(directory_name, directory_name)

def parse_file(file_path, stream_type):
    file_name = os.path.basename(file_path)
    file_id, ext = os.path.splitext(file_name)
    
    # Handle different stream_type
    if stream_type in ['egocam_rgb_audio']:
      # Only accept .mp4 files ending with "encoded_trimmed" or any .json file
        # Return None for .mp4 files that do not end with "encoded_trimmed"
        # if ext.lower() == '.mp4' and not file_id.endswith("encoded_trimmed"):
        if ext.lower() == '.mp4' and not file_id.endswith("720p"):
            return None
        # Return None for non-json files
        if ext.lower() != '.json' and not (ext.lower() == '.mp4' ):
        # if ext.lower() != '.json' and not (ext.lower() == '.mp4' and file_id.endswith("encoded_trimmed")):
            return None
    
    elif   stream_type in ['exocam_rgbd']:
        print("*" * 50)
        print("EXOCAM")
        print(file_id)
        if ext.lower() not in ['.mkv'] or not file_id.endswith("trimmed"): #json for annotations
            return None
        
    elif stream_type == 'vl6180_ToF_depth':
        if ext.lower() != '.csv':
            return None
        
    elif stream_type == 'audio':
        if ext.lower() != '.mp3' and ext.lower() != '.wav':
            return None
            
    elif stream_type == 'smartwatch_imu':
        if ext.lower() != '.csv' or 'sync' not in file_id:
            return None
        
    elif stream_type in ['i3d_flow', 'i3d_rgb']:
        if ext.lower() != '.npy':
            return None
        if stream_type == 'i3d_flow' and 'flow' not in file_id:
            return None
        if stream_type == 'i3d_rgb' and 'rgb' not in file_id:
            return None
    
    elif stream_type == 'resnet50':
        if ext.lower() != '.npy':
            return None
        
    elif stream_type == 'resnet50-exo':
        if ext.lower() != '.npy':
            return None
        
    elif stream_type == 'clip_ego':
        if ext.lower() != '.npy':
            return None

    elif stream_type == 'clip_exo':
        if ext.lower() != '.npy':
            return None
        
    return {
        "file_id": file_id,
        "file_path": file_path,
        "protocol": "Cardiac Arrest - 2-1",
    }

def normalize_subject_key(subject_key):
    # if '_' in subject_key:
    #     return subject_key.split('_')[0]+subject_key.split('_')[1]
    return subject_key

def process_files(files):
    # Log the files found before processing
    print(f"Files found for processing: {files}")  # Debugging statement

    has_txt = any(fnmatch.fnmatch(file, '*.txt') for file in files)
    has_mkv = any(fnmatch.fnmatch(file, '*.mkv') for file in files)
    has_mp4 = any(fnmatch.fnmatch(file, '*.MP4') for file in files)
    has_jpg = any(fnmatch.fnmatch(file, '*.jpg') for file in files)

    if (has_txt or has_jpg):
        # Remove all .txt files from the list
        files = [file for file in files if not fnmatch.fnmatch(file, '*.txt')]
        files = [file for file in files if not fnmatch.fnmatch(file, '*.jpg')]
    
    return files

def process_directory(path):
    subjects = []
    
    if not os.path.exists(path):
        print(f"Path does not exist: {path}")
        return subjects

    for root, dirs, files in os.walk(path):
        parts = root.split(os.sep)
        subject_key = None
        trial_key = None
        stream_type = None  # Initialize stream_type
        print("*" * 50)  # Debugging statement

        print(f"Current directory: {root}")  # Debugging statement
        print(f"Files in directory: {files}")  # Debugging statement


        if len(parts) == 11:
            subject_key = parts[-4]
            trial_key = parts[-2]
            stream = parts[-1]

            if stream not in accepted_stream_types:
                continue

            stream_type = get_stream_type(stream)

            print(f"Processing {root}")
            print(f"Stream type: {stream_type}")

        if subject_key:
            subject_key = normalize_subject_key(subject_key)

        if not subject_key or not trial_key or not stream_type:
            continue

        if subject_key.startswith('ld'):
            continue



        # Find or create the subject entry in the list
        subject_entry = next((subject for subject in subjects if subject['subject_id'] == subject_key), None)
        if not subject_entry:
            subject_entry = OrderedDict({
                "subject_id": subject_key,
                "trials": [],
                "expertise_level": "EMT"
            })
            subjects.append(subject_entry)
        
        current_level = subject_entry["trials"]
        
        # Find or create the trial entry in the trials array
        trial_entry = next((trial for trial in current_level if trial['trial_id'] == trial_key), None)
        if not trial_entry:
            trial_entry = OrderedDict({
                "trial_id": trial_key,
                "streams": OrderedDict(),
                "keysteps": OrderedDict(),
                "interventions": OrderedDict()
            })
            current_level.append(trial_entry)
        
        print("Stream type: ", stream_type)  # Debugging statement
        # Access or create the stream level within the trial


        if stream_type:
            if stream_type not in trial_entry['streams']:
                trial_entry['streams'][stream_type] = []  # Initialize as a list
            stream_level = trial_entry['streams'][stream_type]
        else:
            stream_level = trial_entry['streams']
        
        print(f"Stream level: {stream_level}")  # Debugging statement
        print(f"Files found: {files}")  # Debugging statement

        # Process files
        files = process_files(files)
        
        for file in sorted(files):
            file_info = parse_file(os.path.join(root, file), stream_type)
            if file_info:
                stream_level.append(file_info)  # Append file_info to the stream level
                print(f"Added file info: {file_info} to stream: {stream_type}")  # Debugging statement

    # Sort the subjects array by subject_id for consistency
    subjects = sorted(subjects, key=lambda x: x['subject_id'])
    
    # Sort the trials array by trial_id for consistency
    for subject in subjects:
        subject['trials'] = sorted(subject['trials'], key=lambda x: x['trial_id'])

    print("*" * 50)  # Debugging statement

    return subjects

def generate_json_structure(root_directory, version="v1.2024.08.10"):
    json_structure = OrderedDict({
        "subjects": process_directory(root_directory),
        "version": version
    })
    return json_structure

# Example usage:
# root_directory = "/standard/UVA-DSA/NIST EMS Project Data/EgoExoEMS_CVPR2025/Dataset/Final"
# json_structure = generate_json_structure(root_directory)
# print(json_structure)


In [51]:
! pwd

/sfs/gpfs/tardis/home/cjh9fw/Desktop/2024/repos/EgoExoEMS/Tools/annotation_generation


In [52]:

json_data = generate_json_structure(root_dir)

# sort the json structure
json_data = dict(sorted(json_data.items()))
with open(output_file, 'w') as json_file:
    json.dump(json_data, json_file, indent=4)

print(f"JSON structure saved to {output_file}")


**************************************************
Current directory: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized
Files in directory: ['dir_creator.sh']
**************************************************
Current directory: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_9
Files in directory: []
**************************************************
Current directory: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_9/stroke
Files in directory: []
**************************************************
Current directory: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_9/stroke/0
Files in directory: []
**************************************************
Current directory: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_9/stroke/0/smartwatch_data
Files in directory: ['synchronized_smartwatch_02.csv']
Processi

### Populate Key Steps using VIA Annotations

In [53]:
def add_keysteps_to_json(existing_json, keystep_json_path, subject_id="ng1", trial_id="1", gopro_file_name="gopro_1.mp4"):
    # Load the keystep annotation JSON file
    with open(keystep_json_path, 'r') as f:
        keystep_data = json.load(f)
        
    # Load class id mapping json
    with open('./class_id_mappings.json', 'r') as f:
        class_id_mapping = json.load(f)
    
    # Extract the relevant metadata from the keystep JSON

    print("Adding keysteps to the JSON structure")
    print(f"Subject ID: {subject_id}, Trial ID: {trial_id}, GoPro file name: {gopro_file_name}")

    files = keystep_data['file']
    correct_vid_id = None
    for key, value in files.items():
        file_name = value['fname']

        # for cars data there is an isse. gopro file name is different. rename the fname in the json to match the gopro file name (synced_720p.mp4)

        if( gopro_file_name in file_name ):
            print("Found the file")
            correct_vid_id = key


    keysteps = []
    metadata = keystep_data['metadata']
    # print(metadata)
    for key, value in metadata.items():
        vid_id = value['vid']

        if correct_vid_id != vid_id:
            continue

        try:
            start_t, end_t = value['z']
            label = value['av']['1']
        except:
            print("Error in annotation metadata. Verify the annotation keysteps (start_t, end_t, label) are present.")
            continue

        # check if label is keystep or intervention
        if label not in class_id_mapping['keysteps']:
            print(f"Skipping label {label}. Not a keystep.")
            continue
        
        if task == "cpr_quality" and label != 'chest_compressions':
            print(f"Skipping label {label}. Not a chest compression keystep.")
            continue
        
        keysteps.append({
            "keystep_id": key,
            "start_t": start_t,
            "end_t": end_t,
            "label": label,
            "class_id": class_id_mapping['keysteps'][label]
        })
    
    # Add keysteps to the existing JSON structure
    for subject in existing_json['subjects']:
        if subject['subject_id'] == subject_id:
            for trial in subject['trials']:
                if trial['trial_id'] == trial_id:
                    trial['keysteps'] = keysteps
                    print(f"Found subject {subject_id}")
                    print(f"Added keysteps to trial {trial_id}")
                    print(f"Keysteps: {keysteps}")
                    # if stream in trial['streams']:
                        # trial['streams'][stream]['keysteps'] = keysteps
        
    return existing_json





In [54]:

# iterate over the subjects and trials in main_annotation.json
with open(output_file, 'r') as json_file:
    data = json.load(json_file)
    updated_json = data
    
    for subject in data['subjects']:
        print("*" * 50)
        # if(subject['subject_id'] != 'ng8'):
        #     continue
        for trial in subject['trials']:
            for stream_type, files in trial['streams'].items():
                if(stream_type == 'egocam_rgb_audio'):
                    annotation_file = None
                    gopro_file_name = None
                    for file in files:
                        # check if file['file_path'] ends with '.json'
                        if file['file_path'].endswith('.json'):
                            annotation_file = file['file_path']
                            print(f"Subject: {subject['subject_id']}, Trial: {trial['trial_id']}")
                            print(f"Annotation ID: {file['file_id']}, Annotation File Path: {file['file_path']}")

                        if file['file_path'].endswith('encoded_trimmed.mp4') or file['file_path'].endswith('720p.mp4'):
                            gopro_file_name = file['file_id']+ '.mp4'
                            print(f"GoPro file name: {gopro_file_name}")

                        if annotation_file and gopro_file_name:
                            # Add keysteps to the existing JSON structure   
                            updated_json = add_keysteps_to_json(updated_json, annotation_file, subject_id=subject['subject_id'], trial_id=trial['trial_id'], gopro_file_name=gopro_file_name)
                            print(updated_json)

                            # remove the annotation file from the updated json 
                            trial['streams'][stream_type] = [file for file in trial['streams'][stream_type] if not file['file_path'].endswith('.json')]


                            # Print or save the updated JSON structure

    with open(output_file, 'w') as json_file:
        json.dump(updated_json, json_file, indent=4)
    print(f"JSON structure saved to {output_file}")
    print("*" * 50)


**************************************************
GoPro file name: GX010015_synced_720p.mp4
Subject: cars_1, Trial: 0
Annotation ID: via_project_05May2025_11h53m00s, Annotation File Path: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_1/stroke/0/GoPro/via_project_05May2025_11h53m00s.json
Adding keysteps to the JSON structure
Subject ID: cars_1, Trial ID: 0, GoPro file name: GX010015_synced_720p.mp4
Found the file
Found subject cars_1
Added keysteps to trial 0
Keysteps: [{'keystep_id': '2_dUxrq3SP', 'start_t': 244.317, 'end_t': 252.58725, 'label': 'approach_patient', 'class_id': 0}, {'keystep_id': '2_FyboeYXP', 'start_t': 252.587, 'end_t': 259.75462, 'label': 'assess_patient', 'class_id': 16}, {'keystep_id': '2_IV7VtaTQ', 'start_t': 260.411, 'end_t': 270.43286, 'label': 'check_heart_rate', 'class_id': 44}, {'keystep_id': '2_mpyRrWQH', 'start_t': 318.435, 'end_t': 325.38357, 'label': 'face_droop_check', 'class_id': 47}, {'keystep_id': '2_qcyVUXfZ'

## Convert any list to singular objects in stream

In [32]:

# iterate over the subjects and trials in main_annotation.json
with open(output_file, 'r') as json_file:
    data = json.load(json_file)
    updated_json = data
    
    for subject in updated_json['subjects']:
        print("*" * 50)
        # if(subject['subject_id'] != 'ng8'):
        #     continue
        for trial in subject['trials']:
            for stream_type, files in trial['streams'].items():
                    print(stream_type, files, len(files))
                    # check if files is a list
                    if isinstance(files, list) and len(files) >0:
                        trial['streams'][stream_type] = files[0]
                    
                    print("updated",trial['streams'][stream_type])
                    # if(len(files) > 1):
                    #     trial['streams'][stream_type] = files[0]

                            # Print or save the updated JSON structure

    with open(output_file, 'w') as json_file:
        json.dump(updated_json, json_file, indent=4)
    print(f"JSON structure saved to {output_file}")
    print("*" * 50)


**************************************************
smartwatch_imu [{'file_id': 'sync_smartwatch', 'file_path': '/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_1/chest_pain/0/smartwatch_data/sync_smartwatch.csv', 'protocol': 'Cardiac Arrest - 2-1'}] 1
updated {'file_id': 'sync_smartwatch', 'file_path': '/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_1/chest_pain/0/smartwatch_data/sync_smartwatch.csv', 'protocol': 'Cardiac Arrest - 2-1'}
exocam_rgbd [] 0
updated []
egocam_rgb_audio [{'file_id': 'GX010015_synced_720p', 'file_path': '/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_1/stroke/0/GoPro/GX010015_synced_720p.mp4', 'protocol': 'Cardiac Arrest - 2-1'}] 1
updated {'file_id': 'GX010015_synced_720p', 'file_path': '/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/CARS/organized/cars_1/stroke/0/GoPro/GX010015_synced_720p.mp4', 'protocol': 'Cardiac Arre

# Update Expertise Levels

In [33]:
# load the json file
expertise_level_file = './subject_expertise_level.json'
# load data to a dictionary
with open(expertise_level_file) as json_file:
    expertise_level_data = json.load(json_file)['EXPERTISE_LEVELS']

print(expertise_level_data)

# iterate over the subjects and trials in main_annotation.json and update the expertise level
with open(output_file, 'r') as json_file:
    data = json.load(json_file)
    updated_json = data
    
    for subject in updated_json['subjects']:
        print("*" * 50)
        subject_id = subject['subject_id']
        expertise_level = expertise_level_data.get(subject_id, None)
        if expertise_level:
            subject['expertise_level'] = expertise_level
            print(f"Updated expertise level for subject {subject_id} to {expertise_level}")
        else:
            print(f"Expertise level not found for subject {subject_id}")

    with open(output_file, 'w') as json_file:
        json.dump(updated_json, json_file, indent=4)
    print(f"JSON structure saved to {output_file}")
    print("*" * 50)

{'ms1': 'EMT', 'ms2': 'EMT', 'ng1': 'EMT', 'ng2': 'EMT', 'ng3': 'EMT', 'ng4': 'EMT', 'ng5': 'EMT', 'ng6': 'EMT', 'ng7': 'Not certified', 'ng8': 'Not certified', 'ng9': 'Not certified', 'ng10': 'EMT', 'ng11': 'Not certified', 'wa0': 'Paramedic', 'wa1': 'EMT', 'wa2': 'EMT', 'wa4': 'EMT', 'wa5': 'EMT', 'wa6': 'EMT', 'wa7': 'EMT', 'P0': 'Not certified', 'P1': 'Not certified', 'P2': 'Not certified', 'P3': 'Not certified', 'P4': 'Not certified', 'P5': 'Not certified', 'P6': 'Not certified', 'P7': 'Not certified', 'P8': 'Not certified', 'P9': 'Not certified', 'P10': 'Not certified', 'P11': 'Not certified', 'P12': 'Not certified', 'P13': 'Not certified', 'P14': 'Not certified', 'P15': 'Not certified', 'P16': 'Not certified', 'P17': 'Not certified', 'P18': 'Not certified', 'P19': 'Not certified', 'P20': 'Not certified', 'P21': 'Not certified', 'P22': 'Not certified', 'P23': 'Not certified', 'opvrs_1': 'EMT', 'opvrs_2': 'EMT', 'opvrs_3': 'EMT', 'opvrs_4': 'EMT', 'opvrs_5': 'EMT', 'opvrs_6': 'EMT

# Remove trials that has no keysteps

In [34]:
# remove trials that have no keysteps
if task == "cpr_quality":  
    with open(output_file, 'r') as json_file:
        data = json.load(json_file)
        updated_json = data
        
        # Initialize an empty list to hold subjects with non-empty trials
        subjects_with_trials = []
        
        for subject in updated_json['subjects']:
            print("*" * 50)
            
            # Remove trials with no keysteps
            subject['trials'] = [trial for trial in subject['trials'] if trial['keysteps']]
            
            if subject['trials']:
                subjects_with_trials.append(subject)
                for trial in subject['trials']:
                    print(f"Trial {trial['trial_id']} has keysteps")
            else:
                print(f"Removing subject {subject['subject_id']} as all trials are empty")
        
        # Update the JSON structure to only include subjects with non-empty trials
        updated_json['subjects'] = subjects_with_trials
        
        # Save the modified JSON structure back to the file
        with open(output_file, 'w') as json_file:
            json.dump(updated_json, json_file, indent=4)
        print(f"JSON structure saved to {output_file}")
        print("*" * 50)


# Add scenario key

# Fill gaps of annotations with no_action

In [None]:
import json

def fill_no_action_keysteps(trial):
    keysteps = sorted(trial["keysteps"], key=lambda x: x["start_t"])  # Sort keysteps by start time
    filled_keysteps = []

    if(len(keysteps) == 0):
        return trial
    # Define the "no_action" class id
    no_action_class_id = 15

    # Iterate through existing keysteps
    for i in range(len(keysteps) - 1):
        current_keystep = keysteps[i]
        next_keystep = keysteps[i + 1]
        
        # Add the current keystep to the filled list
        filled_keysteps.append(current_keystep)
        
        # Check for gaps between current keystep's end_t and next keystep's start_t
        if next_keystep["start_t"] > current_keystep["end_t"]:
            # If there's a gap, insert a "no_action" keystep
            no_action_keystep = {
                "keystep_id": f"no_action_{i}",
                "start_t": current_keystep["end_t"],
                "end_t": next_keystep["start_t"],
                "label": "no_action",
                "class_id": no_action_class_id
            }
            filled_keysteps.append(no_action_keystep)
    
    # Add the last keystep
    filled_keysteps.append(keysteps[-1])

    trial["keysteps"] = filled_keysteps
    return trial

def process_subject_trials(subject):
    for trial in subject["trials"]:
        trial = fill_no_action_keysteps(trial)
    return subject

def process_all_subjects(data):
    for subject in data["subjects"]:
        subject = process_subject_trials(subject)
    return data


if task == "segmentation":
    # Load the JSON data
    with open(output_file, "r") as f:
        data = json.load(f)

    # Process the data
    data = process_all_subjects(data)

    # Save the updated JSON with filled "no_action" steps
    with open(output_file, "w") as f:
        json.dump(data, f, indent=4)

    print("Gaps filled with 'no_action' keysteps successfully.")


# Remove overlapping segments


In [None]:
# import json

# def remove_overlapping_keysteps(trials):
#     for trial in trials:
#         # Sort keysteps within each trial by their start time
#         trial["keysteps"] = sorted(trial["keysteps"], key=lambda x: x['start_t'])
#         non_overlapping_keysteps = []

#         for current_keystep in trial["keysteps"]:
#             if not non_overlapping_keysteps:
#                 non_overlapping_keysteps.append(current_keystep)
#             else:
#                 last_keystep = non_overlapping_keysteps[-1]
#                 # Check for overlap
#                 if current_keystep['start_t'] < last_keystep['end_t']:
#                     # Keep the larger keystep (the one with a longer duration)
#                     current_duration = current_keystep['end_t'] - current_keystep['start_t']
#                     last_duration = last_keystep['end_t'] - last_keystep['start_t']
#                     if current_duration > last_duration:
#                         non_overlapping_keysteps[-1] = current_keystep
#                 else:
#                     non_overlapping_keysteps.append(current_keystep)

#         # Replace the keysteps in the trial with the filtered ones
#         trial["keysteps"] = non_overlapping_keysteps


# if task == "segmentation":
    # # Load the JSON file
    # with open(output_file, 'r') as f:
    #     data = json.load(f)

    # # Process each subject's trials
    # for subject in data["subjects"]:
    #     remove_overlapping_keysteps(subject["trials"])

    # # Save the modified data back to a new JSON file
    # with open(output_file, 'w') as f:
    #     json.dump(data, f, indent=4)

    # print("Overlapping keysteps have been removed and saved to 'main_annotation_no_overlap.json'.")


In [None]:
import json

def split_keystep(last_keystep, current_keystep):
    """
    Adjusts the last keystep by splitting it so that it does not overlap with the current keystep.
    """
    if current_keystep['start_t'] < last_keystep['end_t']:
        # Split the last keystep
        last_keystep['end_t'] = current_keystep['start_t']  # Trim the end of the last keystep
    return last_keystep

def remove_and_split_overlapping_keysteps(trials):
    for trial in trials:
        # Sort keysteps within each trial by their start time
        trial["keysteps"] = sorted(trial["keysteps"], key=lambda x: x['start_t'])
        non_overlapping_keysteps = []

        for current_keystep in trial["keysteps"]:
            if not non_overlapping_keysteps:
                non_overlapping_keysteps.append(current_keystep)
            else:
                last_keystep = non_overlapping_keysteps[-1]
                # Check for overlap
                if current_keystep['start_t'] < last_keystep['end_t']:
                    # Split the last keystep to prevent overlap
                    last_keystep = split_keystep(last_keystep, current_keystep)
                non_overlapping_keysteps[-1] = last_keystep  # Update the last keystep if split
                non_overlapping_keysteps.append(current_keystep)

        # Replace the keysteps in the trial with the split non-overlapping ones
        trial["keysteps"] = non_overlapping_keysteps


if task == "segmentation":
    # Load the JSON file
    with open(output_file, 'r') as f:
        data = json.load(f)

    # Process each subject's trials
    for subject in data["subjects"]:
        remove_and_split_overlapping_keysteps(subject["trials"])

    # Save the modified data back to a new JSON file
    with open(output_file, 'w') as f:
        json.dump(data, f, indent=4)

    print("Overlapping keysteps have been removed and saved to 'main_annotation_no_overlap.json'.")


# Load JSON and Test

In [None]:
import json


# Load JSON data from file
data = json.loads(open(output_file).read())

# Iterate through the trials of a specific subject (e.g., 'ng1')

print("Total subjects: ", len(data['subjects']))    

for subject in data['subjects']:
    print("*" * 50)
    subject_id = subject['subject_id']
    trials = subject['trials']
    print(f"Subject ID: {subject_id}")
    print(f"Total trials: {len(trials)}")
    
    for trial in trials:
        print(f"Trial ID: {trial['trial_id']}")
        print(f"Total keysteps: {len(trial['keysteps'])}")
        # for stream_type, stream_data in trial_data.items():
        #     try:
        #         print(f"  Stream Type: {stream_type}")
        #         print(f"    File ID: {stream_data['file_id']}")
        #         print(f"    File Path: {stream_data['file_path']}")
        #     except KeyError:
        #         print(f"    No file data found for stream type '{stream_type} in trial '{trial_id}' of subject '{subject_id}'")
    print("*" * 50)

In [None]:
import json
import pandas as pd

def get_keystep_distribution(subjects):
    keysteps_data = []

    # Extract keystep information from all subjects and trials
    for subject in subjects:
        for trial in subject['trials']:
            for keystep in trial['keysteps']:
                keysteps_data.append({
                    'label': keystep['label'],
                    'class_id': keystep['class_id']
                })

    # Convert to DataFrame for easier analysis
    df = pd.DataFrame(keysteps_data)

    # Get the distribution of keysteps by label (class_id)
    label_distribution = df['label'].value_counts()
    class_distribution = df['class_id'].value_counts()

    return label_distribution, class_distribution

# Load the JSON file
with open(output_file, 'r') as f:
    data = json.load(f)

# Get the keystep distribution
label_distribution, class_distribution = get_keystep_distribution(data['subjects'])

# Display the results
print("Keystep Distribution by Label:")
print(label_distribution)

print("\nKeystep Distribution by Class ID:")
print(class_distribution)


# Plot keysteps

In [None]:
import json
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.colors as mcolors

def plot_keysteps_for_trial(trial, subject_id):
    keysteps_data = []

    # Extract keystep information from the trial
    for keystep in trial['keysteps']:
        keysteps_data.append({
            'label': keystep['label'],
            'start': keystep['start_t'],
            'end': keystep['end_t'],
            'duration': keystep['end_t'] - keystep['start_t']
        })

    # Convert to DataFrame for easier plotting
    df = pd.DataFrame(keysteps_data)

    # Generate a color map based on unique keystep labels
    unique_labels = df['label'].unique()
    colors = list(mcolors.TABLEAU_COLORS.values())  # Use Tableau colors for distinct labels
    color_map = {label: colors[i % len(colors)] for i, label in enumerate(unique_labels)}

    # Plot the keysteps as bars
    plt.figure(figsize=(10, 6))
    for _, row in df.iterrows():
        plt.barh(row['label'], row['duration'], left=row['start'], height=0.4, color=color_map[row['label']], label=row['label'])

    # Add labels and format the plot
    plt.xlabel('Time (seconds)')
    plt.ylabel('Keysteps')
    plt.title(f'Subject {subject_id} Trial {trial["trial_id"]} Keystep Timeline')
    plt.grid(True)
    plt.tight_layout()

    # Show the plot
    plt.show()

def plot_keysteps_for_all_trials(subjects):
    # Loop through each subject and trial and generate individual plots
    for subject in subjects:
        subject_id = subject['subject_id']
        for trial in subject['trials']:
            plot_keysteps_for_trial(trial, subject_id)
        break

# Load the JSON file
with open(output_file, 'r') as f:
    data = json.load(f)

# plot_keysteps_for_all_trials(data['subjects'])


# Legacy: Dont Execute

In [None]:
import json
import csv

def extract_keysteps_to_csv(json_file_path, output_csv_path, stream_name):
    """
    Extract keysteps from the specified stream in the JSON annotation file and save them to a CSV file.

    Args:
        json_file_path (str): Path to the JSON annotation file.
        output_csv_path (str): Path to save the output CSV file.
        stream_name (str): The stream from which to extract keystep annotations.
    """
    # Load the JSON data
    with open(json_file_path, 'r') as json_file:
        data = json.load(json_file)

    # Open the CSV file for writing
    with open(output_csv_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        # Write the header
        writer.writerow(['VIDEO_PATH', 'START_TIME', 'END_TIME', 'LABEL'])

        # Iterate through the subjects and trials in the JSON
        for subject in data['subjects']:
            for trial in subject['trials']:
                if stream_name in trial['streams']:
                    video_path = trial['streams'][stream_name]['file_path']
                    keysteps = trial['streams'][stream_name].get('keysteps', [])
                    # Write each keystep to the CSV file
                    for keystep in keysteps:
                        writer.writerow([
                            video_path,
                            keystep['start_t'],
                            keystep['end_t'],
                            keystep['label']
                        ])

    print(f"Keysteps have been extracted to {output_csv_path}")

# Example usage
json_file_path = './output_structure.json'  # Path to your JSON file
output_csv_path = './video_annotations.json'  # Path to save the CSV file
stream_name = 'egocam_rgb_audio'  # Replace with your specific stream name

extract_keysteps_to_csv(json_file_path, output_csv_path, stream_name)


In [None]:
import json
import csv
import cv2

def extract_keysteps_to_csv_with_frames(json_file_path, output_csv_path, stream_name):
    """
    Extract keysteps from the specified stream in the JSON annotation file,
    convert start/end times to frame numbers, and save them to a CSV file.

    Args:
        json_file_path (str): Path to the JSON annotation file.
        output_csv_path (str): Path to save the output CSV file.
        stream_name (str): The stream from which to extract keystep annotations.
    """
    # Load the JSON data
    with open(json_file_path, 'r') as json_file:
        data = json.load(json_file)

    # Open the CSV file for writing
    with open(output_csv_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        # Write the header
        writer.writerow(['VIDEO_PATH', 'START_FRAME', 'END_FRAME', 'LABEL'])

        # Iterate through the subjects and trials in the JSON
        for subject in data['subjects']:
            for trial in subject['trials']:
                if stream_name in trial['streams']:
                    video_path = trial['streams'][stream_name]['file_path']

                    # Retrieve the video frame rate using OpenCV
                    video_capture = cv2.VideoCapture(video_path)
                    fps = video_capture.get(cv2.CAP_PROP_FPS)
                    video_capture.release()

                    keysteps = trial['streams'][stream_name].get('keysteps', [])
                    # Write each keystep to the CSV file with converted frame numbers
                    for keystep in keysteps:
                        start_frame = int(keystep['start_t'] * fps)
                        end_frame = int(keystep['end_t'] * fps)
                        writer.writerow([
                            video_path,
                            start_frame,
                            end_frame,
                            keystep['label']
                        ])

    print(f"Keysteps with frame numbers have been extracted to {output_csv_path}")

# Example usage
# Example usage
json_file_path = './output_structure.json'  # Path to your JSON file
output_csv_path = './video_annotations.csv'  # Path to save the CSV file
stream_name = 'egocam_rgb_audio'  # Replace with your specific stream name

extract_keysteps_to_csv_with_frames(json_file_path, output_csv_path, stream_name)
