# Extract Frame-Level Labels from .json Annotations 

This script reads frame-level annotations from .json files generated using Label-Studio for a set of surgical video procedures. Each patient has a corresponding .json file that contains annotation data from one or more annotators.

The script:

- Filters annotations by annotator ID (1 or 2)

- Converts time-based annotations into frame-level labels at 1 FPS

- Maps phase labels to numeric class IDs

- Saves the results as .csv files for use in model training

In [1]:
import json
import pandas as pd
import os

def extract_labels(data, annotator_id, patient_id):
    """
    Convert time-based labels from Label Studio .json into frame-level labels at 1 fps.

    Parameters:
        data (dict): json content for one patient.
        annotator_id (int): ID of the annotator (1 or 2).
        patient_id (str): Patient identifier (e.g., '1075').

    Returns:
        pd.DataFrame: DataFrame with columns [frame, id, label]
    """
    df = pd.DataFrame(columns=['frame', 'id', 'label'])

    for idx, video in enumerate(data, start=1):
        if video['annotator'] != annotator_id:
            continue

        for segment in video['tricks']:
            start = segment['start']
            end = segment['end']
            label = segment['labels'][0]  # Assumes only one label per segment

            video_num = f"{patient_id}_{str(idx).zfill(2)}"

            for t in range(int(start), int(end) + 1):
                if t == 0:
                    continue  # Frame 0 is not extracted at 1 fps

                frame_id = f"{video_num}_{str(t).zfill(4)}"
                if frame_id not in df['id'].values:
                    df = pd.concat([
                        df,
                        pd.DataFrame({'frame': t, 'id': frame_id, 'label': label}, index=[0])
                    ], ignore_index=True)

    return df




In [None]:
# Path to the directory containing all JSON annotation files
json_dir = '/home/json_labels/'
json_files = os.listdir(json_dir)

# Annotator ID to extract (1 or 2)
annotator_id = 1

# Output directory for the CSV files
output_dir = f'/home/json_labels/Annotator {annotator_id}'
os.makedirs(output_dir, exist_ok=True)

# Mapping of phase labels (Spanish) to numeric IDs
label_map = {
    'Fase 1': 1,
    'Fase 2': 2,
    'Fase 3': 3,
    'Fase 4': 4,
    'Fase 5': 5,
    'Fase 6': 6,
    'Fase 7': 7,
    'Fase 8': 8,
    'No step': 0
}

# Process each patient's JSON file
for file_name in json_files:
    patient_id, _ = os.path.splitext(file_name)
    file_path = os.path.join(json_dir, file_name)

    with open(file_path, 'r') as f:
        data = json.load(f)

    df = extract_labels(data, annotator_id, patient_id)
    df_csv = df[['id', 'label']]
    df_csv['label'] = df_csv['label'].replace(label_map)

    output_path = os.path.join(output_dir, f"{patient_id}.csv")
    df_csv.to_csv(output_path, index=False)

    print(f"CSV label file created for patient {patient_id}")