# Annotation Generation Tools

In [41]:
import json
import os
from collections import OrderedDict
import fnmatch


In [42]:
output_dir = '../Annotations'

In [45]:
# Function to map stream names to stream types
def get_stream_type(directory_name):
    stream_mapping = {
        'RGBD_camera': 'exocam_rgbd',
        'depth_sensor': 'vl6180_ToF_depth',
        'ego_camera': 'egocam_rgb_audio',
        # Add more mappings if necessary
    }
    return stream_mapping.get(directory_name, directory_name)

def parse_video_file(file_path):
    file_name = os.path.basename(file_path)
    video_id, ext = os.path.splitext(file_name)
    if ext.lower() not in ['.mp4', '.mov', '.mkv', '.txt','.npy' ]: #
        return None
    
    return {
        "file_id": video_id,
        "file_path": file_path,
        "protocol": "Cardiac Arrest - 2-1",
        "interventions": [],
        "keysteps": []
    }

def normalize_subject_key(subject_key):
    if '_' in subject_key:
        return subject_key.split('_')[0]
    return subject_key

# Assuming `files` is a list of filenames in the current directory
def process_files(files):
    # Use fnmatch to check for the presence of .txt and .mkv files
    has_txt = any(fnmatch.fnmatch(file, '*.txt') for file in files)
    has_mkv = any(fnmatch.fnmatch(file, '*.mkv') for file in files)
    has_mp4 = any(fnmatch.fnmatch(file, '*.MP4') for file in files)

    if (has_txt and has_mkv) or (has_txt and has_mp4):
        # Remove all .txt files from the list
        files = [file for file in files if not fnmatch.fnmatch(file, '*.txt')]
    
    return files

def process_directory(path):
    subjects = []
    
    if not os.path.exists(path):
        print(f"Path does not exist: {path}")
        return subjects

    for root, dirs, files in os.walk(path):
        parts = root.split(os.sep)
        stream_type = get_stream_type(parts[-3]) if len(parts) > 3 else None
        subject_key = None
        trial_key = None
        
        if 'ng' in parts[-2]:  # Handling subject folders
            subject_key = parts[-2]
            trial_key = parts[-1]
        elif 'ng' in parts[-3]:  # Handling trial folders within subject
            subject_key = parts[-3]
            trial_key = parts[-2]
        
        if subject_key:
            subject_key = normalize_subject_key(subject_key)
        
        if not subject_key:
            continue
        
        # Find or create the subject entry in the list
        subject_entry = next((subject for subject in subjects if subject['subject_id'] == subject_key), None)
        if not subject_entry:
            subject_entry = OrderedDict({
                "subject_id": subject_key,
                "trials": [],
                "expertise_level": "EMT"
            })
            subjects.append(subject_entry)
        
        current_level = subject_entry["trials"]
        
        # Find or create the trial entry in the trials array
        trial_entry = next((trial for trial in current_level if trial['trial_id'] == trial_key), None)
        if not trial_entry:
            trial_entry = OrderedDict({
                "trial_id": trial_key,
                "streams": OrderedDict()
            })
            current_level.append(trial_entry)
        
        # Access or create the stream level within the trial
        if stream_type:
            if stream_type not in trial_entry['streams']:
                trial_entry['streams'][stream_type] = OrderedDict()
            stream_level = trial_entry['streams'][stream_type]
        else:
            stream_level = trial_entry['streams']
        
        # Check if both txt and mkv files are present in the files list
        files = process_files(files)
        
        for file in sorted(files):
            file_info = parse_video_file(os.path.join(root, file))
            if file_info:
                trial_entry['streams'][stream_type] = file_info

    # Sort the subjects array by subject_id for consistency
    subjects = sorted(subjects, key=lambda x: x['subject_id'])
    
    # Sort the trials array by trial_id for consistency
    for subject in subjects:
        subject['trials'] = sorted(subject['trials'], key=lambda x: x['trial_id'])

    return subjects


def generate_json_structure(root_directory, version="v1.2024.08.10"):
    json_structure = OrderedDict({
        "subjects": process_directory(root_directory),
        "version": version
    })
    return json_structure





In [46]:
root_dir = '/standard/UVA-DSA/NIST EMS Project Data/CognitiveEMS_Datasets/North_Garden/May_2024/May24_updated_structure'  # Replace with your directory path
output_file = f'{output_dir}/main_annotation.json'

json_data = generate_json_structure(root_dir)

# sort the json structure
json_data = dict(sorted(json_data.items()))
with open(output_file, 'w') as json_file:
    json.dump(json_data, json_file, indent=4)

print(f"JSON structure saved to {output_file}")


JSON structure saved to ../Annotations/main_annotation.json


### Populate Key Steps using VIA Annotations

In [36]:
def add_keysteps_to_json(existing_json, keystep_json_path, subject_id="ng1", trial_id="1", stream="vl6180_ToF_depth"):
    # Load the keystep annotation JSON file
    with open(keystep_json_path, 'r') as f:
        keystep_data = json.load(f)
    
    # Extract the relevant metadata from the keystep JSON
    keysteps = []
    metadata = keystep_data['metadata']
    
    for key, value in metadata.items():
        start_t, end_t = value['z']
        label = value['av']['1']
        keysteps.append({
            "keystep_id": key,
            "start_t": start_t,
            "end_t": end_t,
            "label": label
        })
    
    # Add keysteps to the existing JSON structure
    for subject in existing_json['subjects']:
        if subject['subject_id'] == subject_id:
            for trial in subject['trials']:
                if trial['trial_id'] == trial_id:
                    if stream in trial['streams']:
                        trial['streams'][stream]['keysteps'] = keysteps
        
    return existing_json


# Add keysteps to the existing JSON structure
updated_json = add_keysteps_to_json(json_data, './via_video_annotator/via-3.0.13/via_project_28Aug2024_11h02m15s.json', trial_id="1", stream="egocam_rgb_audio")

# Print or save the updated JSON structure
with open(output_file, 'w') as json_file:
    json.dump(json_data, json_file, indent=4)

print(f"JSON structure saved to {output_file}")


JSON structure saved to output_structure.json


# Load JSON and Test

In [68]:
import json



# Load JSON data from file
data = json.loads(open('output_structure.json').read())

# Iterate through the trials of a specific subject (e.g., 'ng1')
subject_id = 'ng1'  # Specify the subject ID you want to iterate through

if subject_id in data['subject']:
    trials = data['subject'][subject_id]['trials']
    
    for trial_id, trial_data in trials.items():
        print(f"Trial ID: {trial_id}")
        for stream_type, stream_data in trial_data.items():
            try:
                print(f"  Stream Type: {stream_type}")
                print(f"    File ID: {stream_data['file_id']}")
                print(f"    File Path: {stream_data['file_path']}")
            except KeyError:
                print(f"    No file data found for stream type '{stream_type} in trial '{trial_id}' of subject '{subject_id}'")
else:
    print(f"Subject ID '{subject_id}' not found in the JSON data.")


Trial ID: 3
  Stream Type: vl6180_ToF_depth
    File ID: depth_sensor_2024-05-23-20_14_15
    File Path: /standard/UVA-DSA/NIST EMS Project Data/CognitiveEMS_Datasets/North_Garden/May_2024/May24_updated_structure/depth_sensor/ng1/3/depth_sensor_2024-05-23-20_14_15.txt
  Stream Type: exocam_rgbd
    File ID: 2024-05-23-20-14-12
    File Path: /standard/UVA-DSA/NIST EMS Project Data/CognitiveEMS_Datasets/North_Garden/May_2024/May24_updated_structure/RGBD_camera/ng1/3/2024-05-23-20-14-12.mkv
Trial ID: 2
  Stream Type: vl6180_ToF_depth
    File ID: depth_sensor_2024-05-23-20_10_58
    File Path: /standard/UVA-DSA/NIST EMS Project Data/CognitiveEMS_Datasets/North_Garden/May_2024/May24_updated_structure/depth_sensor/ng1/2/depth_sensor_2024-05-23-20_10_58.txt
  Stream Type: exocam_rgbd
    File ID: 2024-05-23-20-10-54
    File Path: /standard/UVA-DSA/NIST EMS Project Data/CognitiveEMS_Datasets/North_Garden/May_2024/May24_updated_structure/RGBD_camera/ng1/2/2024-05-23-20-10-54.mkv
Trial ID: 4


In [30]:
import json
import csv

def extract_keysteps_to_csv(json_file_path, output_csv_path, stream_name):
    """
    Extract keysteps from the specified stream in the JSON annotation file and save them to a CSV file.

    Args:
        json_file_path (str): Path to the JSON annotation file.
        output_csv_path (str): Path to save the output CSV file.
        stream_name (str): The stream from which to extract keystep annotations.
    """
    # Load the JSON data
    with open(json_file_path, 'r') as json_file:
        data = json.load(json_file)

    # Open the CSV file for writing
    with open(output_csv_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        # Write the header
        writer.writerow(['VIDEO_PATH', 'START_TIME', 'END_TIME', 'LABEL'])

        # Iterate through the subjects and trials in the JSON
        for subject in data['subjects']:
            for trial in subject['trials']:
                if stream_name in trial['streams']:
                    video_path = trial['streams'][stream_name]['file_path']
                    keysteps = trial['streams'][stream_name].get('keysteps', [])
                    # Write each keystep to the CSV file
                    for keystep in keysteps:
                        writer.writerow([
                            video_path,
                            keystep['start_t'],
                            keystep['end_t'],
                            keystep['label']
                        ])

    print(f"Keysteps have been extracted to {output_csv_path}")

# Example usage
json_file_path = './output_structure.json'  # Path to your JSON file
output_csv_path = './video_annotations.json'  # Path to save the CSV file
stream_name = 'egocam_rgb_audio'  # Replace with your specific stream name

extract_keysteps_to_csv(json_file_path, output_csv_path, stream_name)


Keysteps have been extracted to ./video_annotations.json


In [37]:
import json
import csv
import cv2

def extract_keysteps_to_csv_with_frames(json_file_path, output_csv_path, stream_name):
    """
    Extract keysteps from the specified stream in the JSON annotation file,
    convert start/end times to frame numbers, and save them to a CSV file.

    Args:
        json_file_path (str): Path to the JSON annotation file.
        output_csv_path (str): Path to save the output CSV file.
        stream_name (str): The stream from which to extract keystep annotations.
    """
    # Load the JSON data
    with open(json_file_path, 'r') as json_file:
        data = json.load(json_file)

    # Open the CSV file for writing
    with open(output_csv_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        # Write the header
        writer.writerow(['VIDEO_PATH', 'START_FRAME', 'END_FRAME', 'LABEL'])

        # Iterate through the subjects and trials in the JSON
        for subject in data['subjects']:
            for trial in subject['trials']:
                if stream_name in trial['streams']:
                    video_path = trial['streams'][stream_name]['file_path']

                    # Retrieve the video frame rate using OpenCV
                    video_capture = cv2.VideoCapture(video_path)
                    fps = video_capture.get(cv2.CAP_PROP_FPS)
                    video_capture.release()

                    keysteps = trial['streams'][stream_name].get('keysteps', [])
                    # Write each keystep to the CSV file with converted frame numbers
                    for keystep in keysteps:
                        start_frame = int(keystep['start_t'] * fps)
                        end_frame = int(keystep['end_t'] * fps)
                        writer.writerow([
                            video_path,
                            start_frame,
                            end_frame,
                            keystep['label']
                        ])

    print(f"Keysteps with frame numbers have been extracted to {output_csv_path}")

# Example usage
# Example usage
json_file_path = './output_structure.json'  # Path to your JSON file
output_csv_path = './video_annotations.csv'  # Path to save the CSV file
stream_name = 'egocam_rgb_audio'  # Replace with your specific stream name

extract_keysteps_to_csv_with_frames(json_file_path, output_csv_path, stream_name)


Keysteps with frame numbers have been extracted to ./video_annotations.csv
