In [2]:
import ndjson
import json
import os
from audio_extract import extract_audio
import requests

In [2]:
META_DATA_PATH = 'Audio_data_labelbox/Action video labels - 2_14_2025.ndjson'
VIDEOS_DIRECTORY = 'Videos'
AUDIOS_DIRECTORY = 'Audios'
TIMESTAMP_ANNOTATIONS = 'latest_timestamp_annotations.json'

In [3]:
def download_video(video_url, video_name, video_path):
    """
    Downloads a video from the given URL and saves it with the specified name in the videos folder.
    
    Args:
        video_url (str): The URL of the video to be downloaded.
        video_name (str): The name of the video file.
        videos_folder (str): The path to the folder where the video will be saved.
    """
    if os.path.exists(video_path):
        print(f"Video '{video_path}' already exists. Skipping download.")
        return
    
    try:
        response = requests.get(video_url, stream=True)
        if response.status_code == 200:
            with open(video_path, 'wb') as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            print(f"Video '{video_name}' downloaded successfully.")
        else:
            print(f"Error downloading video '{video_name}': {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading video '{video_name}': {e}")

In [1]:
def video_to_audio(video_path, audio_path):
    """
    Convert a video file to an audio file in .wav format.
    Args:
        video_path (str): Path to the input video file.
        audio_path (str): Path to save the output .wav file.
    """
    if os.path.exists(audio_path):
        print(f"Audio '{audio_path}' already exists. Skipping download.")
        return
    
    try:
        extract_audio(input_path=video_path, output_path=audio_path)

    except Exception as e:
        print(f"An error occurred: {e}")

In [3]:
video_to_audio('Videos/test_video.mp4', 'Audios/test_audio.mp3')

Success : audio file has been saved to "d:\VS Code Folders\audio_activity_detection\Audios\test_audio.mp3".


In [9]:
processed_data = {}
with open (META_DATA_PATH, 'r') as file:
    metadata = ndjson.load(file)
    for item in metadata:
        video_url = item['data_row']['row_data']
        video_name = item['data_row']['external_id']
        
        video_path = os.path.join(VIDEOS_DIRECTORY, video_name)
        # download_video(video_url, video_name, video_path)
        
        audio_name = video_name[:-3] + 'mp3'
        audio_path = os.path.join(AUDIOS_DIRECTORY, audio_name)
        
        # video_to_audio(video_path, audio_path)
        video_frame_rate = item['media_attributes']['frame_rate']
        frames = item["projects"]["clvksmh1x038b07z80gv75tec"]["labels"][0]["annotations"]["frames"]
        
        # Loop through and extract frame numbers
        activities = {}
        for frame, annotations in frames.items():
            frame = int(frame)
            classifications = annotations.get("classifications", [])
            for classification in classifications:
                label_value = classification["value"]
                if label_value not in activities:
                    activities[label_value] = []
                activities[label_value].append(round(frame / video_frame_rate, 1))
        
        for key, times_array in activities.items():
            times_array.sort()
            if (len(times_array) % 2) != 0:
                print(video_name)
                
            
            activities[key] = [times_array[i: i + 2] for i in range(0, len(times_array), 2)]
        
        processed_data[audio_name] = activities

vid_771.mp4


In [6]:
processed_data

{'vid_554.mp3': {'turn': [[1.0, 5.1]],
  'drift': [[3.4, 4.7]],
  'high_speed': [[0.8, 5.0]],
  'long_noise': [[0.3, 6.0]]},
 'vid_555.mp3': {'crash': [[1.7, 2.0]],
  'jump': [[1.1, 1.9]],
  'drift': [[1.7, 2.1]],
  'long_noise': [[1.9, 3.8]]},
 'vid_561.mp3': {'long_noise': [[0.1, 3.5]]},
 'vid_562.mp3': {'drift': [[0.9, 2.1], [2.6, 4.7]],
  'turn': [[0.9, 2.1], [2.6, 5.2]],
  'high_speed': [[0.9, 5.2]],
  'long_noise': [[0.7, 7.3]]},
 'vid_563.mp3': {'drift': [[0.9, 2.0]],
  'jump': [[1.2, 2.0]],
  'crash': [[1.4, 2.1]],
  'long_noise': [[0.6, 3.5]]},
 'vid_564.mp3': {'long_noise': [[0.3, 8.0]],
  'jump': [[1.5, 2.4]],
  'turn': [[5.2, 5.8]]},
 'vid_565.mp3': {'long_noise': [[0.6, 8.6]],
  'spin': [[1.4, 2.4]],
  'turn': [[5.4, 6.6]]},
 'vid_566.mp3': {'long_noise': [[0.1, 5.7]], 'spin': [[0.9, 2.6]]},
 'vid_567.mp3': {'long_noise': [[0.4, 11.2]], 'spin': [[6.0, 7.2]]},
 'vid_568.mp3': {'spin': [[1.0, 2.3]],
  'long_noise': [[0.0, 12.2]],
  'turn': [[3.0, 4.2]],
  'crowd_noise': [[1.