# Labelbox annotations Sample Processing code

## This notebook contains sample code to process ndjson file we get from a Labelbox project (by exporting data) and process annotations into Activitynet required format (required by ActionFormer)

### Sample Activitynet json format 

In [10]:
#sample
"""
{
  "video1": {
      "duration_second": 211.53,
      "duration_frame": 6337,
      "annotations": [
          {
              "segment": [
                  30.025882995319815,
                  205.2318595943838
              ],
              "label": "Rock climbing"
          }
      ],
      "feature_frame": 6336,
      "fps": 30.0,
      "rfps": 29.9579255898
  },
  
"""

'\n{\n  "video1": {\n      "duration_second": 211.53,\n      "duration_frame": 6337,\n      "annotations": [\n          {\n              "segment": [\n                  30.025882995319815,\n                  205.2318595943838\n              ],\n              "label": "Rock climbing"\n          }\n      ],\n      "feature_frame": 6336,\n      "fps": 30.0,\n      "rfps": 29.9579255898\n  },\n\n'

### Import the required Libraries

In [1]:
import ndjson
import json
import os
import requests

In [2]:
!ls

'ls' is not recognized as an internal or external command,
operable program or batch file.


In [3]:
!cd

e:\VS Code Folders\ActionFormer\code files


### Specify the paths

In [11]:
# path of the labelbox metadata file
META_DATA_PATH = 'Export  project - YOLO incorrect predicted videos 24.6.2025 - 7_15_2025.ndjson'

# directory to download the videos
VIDEOS_DIRECTORY = r"videos"

# path of the file to which save the processed annotations
ANNOTATIONS_FILE = 'activitynet_annotations.json'

In [5]:
os.makedirs(VIDEOS_DIRECTORY, exist_ok = True)

### Specify the activities labels to extract

In [4]:
activities_to_track = ['crash', 'drift', 'jump']

### Dictionary to store the processed data

In [5]:
processed_data = {}

### Function to download the videos

In [6]:
def download_video(video_url, video_name, video_path):
    """
    Downloads a video from the given URL and saves it with the specified name in the videos folder.
    
    Args:
        video_url (str): The URL of the video to be downloaded.
        video_name (str): The name of the video file.
        videos_folder (str): The path to the folder where the video will be saved.
    """
    if os.path.exists(video_path):
        print(f"Video '{video_path}' already exists. Skipping download.")
        return
    
    try:
        response = requests.get(video_url, stream=True)
        if response.status_code == 200:
            with open(video_path, 'wb') as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            print(f"Video '{video_name}' downloaded successfully.")
        else:
            print(f"Error downloading video '{video_name}': {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading video '{video_name}': {e}")

### Data processing loop

In [None]:
with open (META_DATA_PATH, 'r') as file:
    metadata = ndjson.load(file)
    for item in metadata:
        video_url = item['data_row']['row_data']
        video_name = item['data_row']['external_id']
        
        processed_data[video_name[:-4]] = {}
        processed_data[video_name[:-4]]['duration_second'] = item['media_attributes']['frame_count'] / item['media_attributes']['frame_rate']
        processed_data[video_name[:-4]]['duration_frame'] = item['media_attributes']['frame_count']
        processed_data[video_name[:-4]]['annotations'] = []
        
        video_path = os.path.join(VIDEOS_DIRECTORY, video_name)
        download_video(video_url, video_name, video_path)
        
        video_frame_rate = item['media_attributes']['frame_rate']

        # Get the first (and usually only) project key
        project_dict = item.get("projects", {})
        project_key = next(iter(project_dict), None)

        if project_key is None:
            raise ValueError(f"No project key found for video: {video_name}")

        frames = item["projects"][project_key]["labels"][0]["annotations"]["frames"]
        
        # Loop through and extract frame numbers
        activities = {}
        for frame, annotations in frames.items():
            frame = int(frame)
            classifications = annotations.get("classifications", [])
            for classification in classifications:
                label_value = classification["value"]
                if label_value not in activities_to_track:
                    continue
                if label_value not in activities:
                    activities[label_value] = []
                activities[label_value].append(round(frame / video_frame_rate, 2))
        
        for key, times_array in activities.items():
            times_array.sort()
            if (len(times_array) % 2) != 0:
                print(video_name)
            
            
            for i in range(0, len(times_array), 2):
                annotation_item = {}
                annotation_item['segment'] = [times_array[i], times_array[i + 1]]
                annotation_item['label'] = key
                processed_data[video_name[:-4]]['annotations'].append(annotation_item)


In [14]:
processed_data

{'vid_549': {'duration_second': 7.6,
  'duration_frame': 190,
  'annotations': [{'segment': [1.32, 1.88], 'label': 'jump'}]},
 'vid_550': {'duration_second': 6.6,
  'duration_frame': 165,
  'annotations': [{'segment': [0.56, 4.64], 'label': 'drift'}]},
 'vid_551': {'duration_second': 8.88,
  'duration_frame': 222,
  'annotations': [{'segment': [1.2, 2.4], 'label': 'drift'}]},
 'vid_552': {'duration_second': 8.56,
  'duration_frame': 214,
  'annotations': [{'segment': [0.68, 1.56], 'label': 'drift'},
   {'segment': [2.44, 3.28], 'label': 'drift'},
   {'segment': [4.16, 5.16], 'label': 'drift'}]},
 'vid_553': {'duration_second': 10.56,
  'duration_frame': 264,
  'annotations': [{'segment': [1.32, 2.48], 'label': 'drift'},
   {'segment': [5.76, 6.92], 'label': 'drift'}]},
 'vid_556': {'duration_second': 9.32,
  'duration_frame': 233,
  'annotations': [{'segment': [0.92, 2.08], 'label': 'drift'}]},
 'vid_557': {'duration_second': 7.6,
  'duration_frame': 190,
  'annotations': [{'segment': 

In [15]:
len(processed_data)

116

### Save the processed annotations

In [None]:
# Step 1: Load existing JSON data if file exists, otherwise start with empty dict
try:
    with open(ANNOTATIONS_FILE, 'r') as file:
        existing_data = json.load(file)
except FileNotFoundError:
    existing_data = {}

# Step 2: Update existing data with new annotations
existing_data.update(processed_data)

# Step 3: Save back the updated data (create or overwrite file)
with open(ANNOTATIONS_FILE, 'w') as file:
    json.dump(existing_data, file, indent=4)