In [None]:
import ndjson
import json
import os
from audio_extract import extract_audio
import requests

In [None]:
META_DATA_PATH = 'LabelBox/crowd_noise_videos.ndjson'
VIDEOS_DIRECTORY = 'Videos'
AUDIOS_DIRECTORY = 'Audios'
TIMESTAMP_ANNOTATIONS = 'timestamp_annotations.json'

In [None]:
def download_video(video_url, video_name, video_path):
    """
    Downloads a video from the given URL and saves it with the specified name in the videos folder.
    
    Args:
        video_url (str): The URL of the video to be downloaded.
        video_name (str): The name of the video file.
        videos_folder (str): The path to the folder where the video will be saved.
    """
    if os.path.exists(video_path):
        print(f"Video '{video_path}' already exists. Skipping download.")
        return
    
    
    try:
        response = requests.get(video_url, stream=True)
        if response.status_code == 200:
            with open(video_path, 'wb') as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            print(f"Video '{video_name}' downloaded successfully.")
        else:
            print(f"Error downloading video '{video_name}': {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading video '{video_name}': {e}")

In [46]:
def video_to_audio(video_path, audio_path):
    """
    Convert a video file to an audio file in .wav format.
    Args:
        video_path (str): Path to the input video file.
        audio_path (str): Path to save the output .wav file.
    """
    try:
        extract_audio(input_path=video_path, output_path=audio_path)

    except Exception as e:
        print(f"An error occurred: {e}")

In [50]:
processed_data = {}
with open (META_DATA_PATH, 'r') as file:
    metadata = ndjson.load(file)
    for item in metadata:
        video_url = item['data_row']['row_data']
        video_name = item['data_row']['external_id']
        
        video_path = os.path.join(VIDEOS_DIRECTORY, video_name)
        download_video(video_url, video_name, video_path)
        
        audio_name = video_name[:-3] + 'mp3'
        audio_path = os.path.join(AUDIOS_DIRECTORY, audio_name)
        
        video_to_audio(video_path, audio_path)
        video_frame_rate = item['media_attributes']['frame_rate']
        frames = item["projects"]["cloq6ol0d05hp07xvfiy99gg5"]["labels"][0]["annotations"]["frames"]
        
        # Loop through and extract frame numbers
        activities = {}
        for frame, annotations in frames.items():
            frame = int(frame)
            classifications = annotations.get("classifications", [])
            for classification in classifications:
                label_value = classification["value"]
                if label_value not in activities:
                    activities[label_value] = []
                activities[label_value].append(round(frame / video_frame_rate, 1))
        
        for key, times_array in activities.items():
            times_array.sort()
            
            activities[key] = [times_array[i: i + 2] for i in range(0, len(times_array), 2)]
        
        processed_data[audio_name] = activities

https://storage.labelbox.com/cloq65e7f04db07zja4dya06y%2F0cf4fef1-0875-4a85-8321-8e88fe76511a-vid_1.mp4?Expires=1733409535986&KeyName=labelbox-assets-key-3&Signature=W8_0KY_8f7l0s0aJGA7v-5q5rBc
vid_1.mp4
Videos\vid_1.mp4
Video 'Videos\vid_1.mp4' already exists. Skipping download.
An error occurred: File already exists in output path: d:\VS Code Folders\audio_activity_detection\Audios\vid_1.mp3.
https://storage.labelbox.com/cloq65e7f04db07zja4dya06y%2F70001f41-5720-1261-235f-8d4cef947f4a-vid_5.mp4?Expires=1733409535987&KeyName=labelbox-assets-key-3&Signature=jE5oZznYR-Z3Om2jJFAFwZ3_q68
vid_5.mp4
Videos\vid_5.mp4
Video 'vid_5.mp4' downloaded successfully.
Success : audio file has been saved to "d:\VS Code Folders\audio_activity_detection\Audios\vid_5.mp3".
https://storage.labelbox.com/cloq65e7f04db07zja4dya06y%2F55f787e3-8d66-ce08-abe1-b658028cd581-vid_7.mp4?Expires=1733409535987&KeyName=labelbox-assets-key-3&Signature=SYiVmg4ZHktP1WyRk7Jg7gyhw1Q
vid_7.mp4
Videos\vid_7.mp4
Video 'vid_7.m

In [None]:
with open(TIMESTAMP_ANNOTATIONS, 'w') as output_file:
    json.dump(processed_data, output_file)