In [None]:
# Audioset downloader
# Use youtube-dl to download, keep track of ones downloaded

import csv
import subprocess
import os
import datetime

# Input CSV file with video details
input_file = 'audioset/balanced_train_segments.csv'
# Output CSV file for logging failures
failures_file = 'audioset/balanced_train_segments_FAILED.csv'

# Create a directory to store audio files
audio_directory = 'audioset/data/download-test'
os.makedirs(audio_directory, exist_ok=True)

# Open the input file and failure file for writing
with open(input_file, 'r') as csvfile, open(failures_file, 'w', newline='') as failurefile:
    reader = csv.reader(csvfile)
    failure_writer = csv.writer(failurefile)

    # Write the header for the failure file
    failure_writer.writerow(['YTID', 'error'])

    # Skip the header rows
    next(reader, None)
    next(reader, None)
    next(reader, None)

    for row in reader:
        ytid = row[0]
        start_seconds = row[1]
        duration = float(row[2]) - float(row[1])
        positive_labels = ','.join(row[3:])

        video_url = f'https://www.youtube.com/watch?v={ytid}'
        output_audio_file = os.path.join(audio_directory, f'{ytid}.wav')

        # Get the URL for the best audio stream
        command_to_get_url = f"youtube-dl -g -f bestaudio {video_url}"
        audio_url = subprocess.getoutput(command_to_get_url).strip()

        if not audio_url:
            failure_writer.writerow([ytid, 'URL retrieval failure'])
            print(f'Failed to retrieve URL for video {ytid}')
            continue

        # Download and convert the audio segment using ffmpeg
        command_to_download_convert = f"ffmpeg -ss {start_seconds} -i \"{audio_url}\" -t {duration} -acodec pcm_s16le -ar 44100 {output_audio_file}"
        download_result = os.system(command_to_download_convert)

        if download_result != 0:
            failure_writer.writerow([ytid, 'Download or conversion failure'])
            print(f'Failed to process video {ytid}')
            continue

        print(f'Successfully processed video {ytid}')

In [2]:
# Audiocaps downloader
# Use youtube-dl to download, keep track of ones downloaded

import csv
import subprocess
import os

# Input CSV file with video details
input_file = 'audiocaps/dataset/train.csv'
# Output CSV file for metadata"
success_file = 'audiocaps/dataset/train_download_success.csv'
# Output CSV file for logging failures
failures_file = 'audiocaps/dataset/train_download_fail.csv'

# Create a directory to store audio files
audio_directory = 'audiocaps/data/dataset/train'
os.makedirs(audio_directory, exist_ok=True)

# Open the input file and failure file for writing
with open(input_file, 'r') as csvfile, open(failures_file, 'w', newline='') as failurefile, open(success_file, 'w', newline='') as successfile:
    reader = csv.reader(csvfile)
    failure_writer = csv.writer(failurefile)
    success_writer = csv.writer(successfile)

    # Write the header for the files
    failure_writer.writerow(['audiocap_id', 'youtube_id', 'error'])
    success_writer.writerow(['audiocap_id', 'youtube_id', 'caption'])


    # Skip the header rows
    next(reader, None)

    for row in reader:
        audiocap_id = row[0]
        ytid = row[1]
        start_seconds = row[2]
        duration = "00:00:10"
        caption = row[3]

        video_url = f'https://www.youtube.com/watch?v={ytid}'
        output_audio_file = os.path.join(audio_directory, f'{ytid}.wav')

        # Get the URL for the best audio stream
        command_to_get_url = f"youtube-dl -g -f bestaudio {video_url}"
        audio_url = subprocess.getoutput(command_to_get_url).strip()

        if not audio_url:
            failure_writer.writerow([audiocap_id, ytid, 'URL retrieval failure'])
            print(f'Failed to retrieve URL for video {ytid}')
            continue

        # Download and convert the audio segment using ffmpeg
        command_to_download_convert = f"ffmpeg -ss {start_seconds} -i \"{audio_url}\" -t {duration} -acodec pcm_s16le -ar 44100 {output_audio_file}"
        download_result = os.system(command_to_download_convert)

        if download_result != 0:
            failure_writer.writerow([ytid, 'Download or conversion failure'])
            print(f'Failed to process video {ytid}')
            continue

        print(f'Successfully processed video {ytid}')
        success_writer.writerow([audiocap_id, ytid, caption])
        

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/opt/conda/conda-bld/ffmpeg_1597178665428/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh --cc=/opt/conda/conda-bld/ffmpeg_1597178665428/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libavresample   4.  0.  0 /  4.  0.  0
  libsw