# Set the Working Directory

In [None]:
import os

working_dir = os.getcwd()

if os.path.isdir(working_dir):
    print("Working directory is ready!")
else:
    raise ValueError("Working directory does not exist")

# Set the Trace Directory

In [None]:
import yaml

# Load configuration file
config_path = os.path.join(working_dir, "configuration.yaml")
with open(config_path, "r") as f:
    config = yaml.safe_load(f)

# Resolve trace directory
trace_dir = os.path.join(working_dir, "data", config["working_trace"])

if os.path.isdir(trace_dir):
    print(f"✅ Trace directory ready: {trace_dir}")
else:
    raise FileNotFoundError(f"❌ Trace directory not found: {trace_dir}\n")

In [None]:
from utils import read_file_ids

# Read file IDs from the trace directory
file_ids = read_file_ids(trace_dir=trace_dir, overwrite=False)

print(f"📁 Total video files found: {len(file_ids)}")

# Convert Video to Audio Using `ffmpeg`

In [None]:
import subprocess

video_dir = os.path.join(trace_dir, "videos")
audio_dir = os.path.join(trace_dir, "audios")
os.makedirs(audio_dir, exist_ok=True)

for idx, file_id in enumerate(file_ids, start=1):
    audio_path = os.path.join(audio_dir, f"{file_id}.mp3")

    if os.path.exists(audio_path):
        print(f"✔️ Audio already exists: {file_id}.mp3")
        continue

    print(f"🔄 [{idx}/{len(file_ids)}] Extracting audio: {file_id}.mp3")

    video_path = os.path.join(video_dir, f"{file_id}.mp4")
    
    command = f'ffmpeg -y -i "{video_path}" "{audio_path}"'
    result = subprocess.run(command, shell=True, capture_output=True)

    if result.returncode != 0:
        print(f"❌ Failed to extract audio for {file_id}.mp4")
        print(result.stderr.decode())
    else:
        print(f"✅ Audio extracted: {file_id}.mp3")