1. Install required packages

In [None]:
!pip install azure-storage-blob
!apt-get update
!apt-get install -y ffmpeg

2. Set up Azure Blob storage client

In [None]:
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import os

# Set your Azure Storage connection string (use a secret store or environment variable in real use)
AZURE_STORAGE_CONNECTION_STRING = os.getenv("TBD")

# Define containers
input_container_name = os.getenv("TBD")
output_container_name = os.getenv("TBD")

# Initialize blob service
blob_service_client = BlobServiceClient.from_connection_string(AZURE_STORAGE_CONNECTION_STRING)
input_container = blob_service_client.get_container_client(input_container_name)
output_container = blob_service_client.get_container_client(output_container_name)

3. Process all mp4 files

In [None]:
import subprocess

# Create local temp directory
os.makedirs("temp", exist_ok=True)

for blob in input_container.list_blobs():
    if blob.name.endswith(".mp4"):
        video_blob_name = blob.name
        local_video_path = os.path.join("temp", os.path.basename(video_blob_name))
        audio_output_name = video_blob_name.replace(".mp4", ".mp3")
        local_audio_path = os.path.join("temp", os.path.basename(audio_output_name))

        print(f"Processing {video_blob_name}...")

        # Download video
        with open(local_video_path, "wb") as download_file:
            download_stream = input_container.download_blob(video_blob_name)
            download_file.write(download_stream.readall())

        # Extract audio using ffmpeg
        subprocess.run(["ffmpeg", "-i", local_video_path, "-q:a", "0", "-map", "a", local_audio_path], check=True)

        # Upload to raw-audio container
        with open(local_audio_path, "rb") as data:
            output_container.upload_blob(name=audio_output_name, data=data, overwrite=True)
            print(f"Uploaded {audio_output_name} to {output_container_name}")

        # Optional: Clean up temp files
        os.remove(local_video_path)
        os.remove(local_audio_path)

print("✅ All .mp4 files processed.")
