In [2]:
import ndjson
import json
import os
from audio_extract import extract_audio
import requests

In [3]:
META_DATA_PATH = 'Audio_data_labelbox/Action video labels - 2_14_2025.ndjson'
VIDEOS_DIRECTORY = 'Videos'
AUDIOS_DIRECTORY = 'Audios'
TIMESTAMP_ANNOTATIONS = 'activitynet_annotations.json'

In [None]:
#sample
"""
{
  "video1": {
      "duration_second": 211.53,
      "duration_frame": 6337,
      "annotations": [
          {
              "segment": [
                  30.025882995319815,
                  205.2318595943838
              ],
              "label": "Rock climbing"
          }
      ],
      "feature_frame": 6336,
      "fps": 30.0,
      "rfps": 29.9579255898
  },
"""

In [7]:
processed_data = {}
with open (META_DATA_PATH, 'r') as file:
    metadata = ndjson.load(file)
    for item in metadata:
        video_url = item['data_row']['row_data']
        video_name = item['data_row']['external_id']
        if video_name == 'vid_771.mp4':
            continue
        processed_data[video_name[:-4]] = {}
        processed_data[video_name[:-4]]['duration_second'] = item['media_attributes']['frame_count'] / item['media_attributes']['frame_rate']
        processed_data[video_name[:-4]]['duration_frame'] = item['media_attributes']['frame_count']
        processed_data[video_name[:-4]]['annotations'] = []
        
        video_path = os.path.join(VIDEOS_DIRECTORY, video_name)
        # download_video(video_url, video_name, video_path)
        
        audio_name = video_name[:-3] + 'mp3'
        audio_path = os.path.join(AUDIOS_DIRECTORY, audio_name)
        
        # video_to_audio(video_path, audio_path)
        video_frame_rate = item['media_attributes']['frame_rate']
        frames = item["projects"]["clvksmh1x038b07z80gv75tec"]["labels"][0]["annotations"]["frames"]
        
        # Loop through and extract frame numbers
        activities = {}
        for frame, annotations in frames.items():
            frame = int(frame)
            classifications = annotations.get("classifications", [])
            for classification in classifications:
                label_value = classification["value"]
                if label_value not in activities:
                    activities[label_value] = []
                activities[label_value].append(round(frame / video_frame_rate, 2))
        
        for key, times_array in activities.items():
            times_array.sort()
            if (len(times_array) % 2) != 0:
                print(video_name)
            
            
            for i in range(0, len(times_array), 2):
                annotation_item = {}
                annotation_item['segment'] = [times_array[i], times_array[i + 1]]
                annotation_item['label'] = key
                processed_data[video_name[:-4]]['annotations'].append(annotation_item)


In [8]:
processed_data

{'vid_554': {'duration_second': 6.0,
  'duration_frame': 150,
  'annotations': [{'segment': [0.96, 5.08], 'label': 'turn'},
   {'segment': [3.36, 4.68], 'label': 'drift'},
   {'segment': [0.84, 5.04], 'label': 'high_speed'},
   {'segment': [0.28, 6.0], 'label': 'long_noise'}]},
 'vid_555': {'duration_second': 3.8,
  'duration_frame': 95,
  'annotations': [{'segment': [1.72, 2.0], 'label': 'crash'},
   {'segment': [1.08, 1.88], 'label': 'jump'},
   {'segment': [1.72, 2.12], 'label': 'drift'},
   {'segment': [1.92, 3.8], 'label': 'long_noise'}]},
 'vid_561': {'duration_second': 3.48,
  'duration_frame': 87,
  'annotations': [{'segment': [0.08, 3.48], 'label': 'long_noise'}]},
 'vid_562': {'duration_second': 7.32,
  'duration_frame': 183,
  'annotations': [{'segment': [0.88, 2.08], 'label': 'drift'},
   {'segment': [2.64, 4.68], 'label': 'drift'},
   {'segment': [0.88, 2.08], 'label': 'turn'},
   {'segment': [2.56, 5.16], 'label': 'turn'},
   {'segment': [0.88, 5.16], 'label': 'high_speed

In [None]:
PROCESSED_DATA_PATH = "processed_data.json"

with open(PROCESSED_DATA_PATH, "w") as json_file:
    json.dump(processed_data, json_file, indent=4)