# Script to download audioset
Based on https://github.com/marl/audiosetdl/blob/master/notebooks/download.ipynb

In [8]:
# Path to ffmpeg
ffmpeg_path = '/usr/local/bin/ffmpeg'

# These need to exist
videos_path = 'data/videos/'
audio_path = 'data/audio/'
flac_path = audio_path + 'flac/'

%matplotlib inline
import sys
import os.path
# Make sure ffmpeg is on the path so sk-video can find it
sys.path.append(os.path.dirname(ffmpeg_path))
import skvideo.io
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pafy
import soundfile as sf
import subprocess as sp
from IPython.display import Video

In [9]:
# Set output settings
audio_codec = 'flac'
audio_container = 'flac'
video_codec = 'h264'
video_container = 'mp4'

### Sorting out what data to download based on segments csv

In [10]:
# Define here what labels to download

labels_dict = {'car_passing_by': '/t/dd00134',
#                'outside_urban': '/t/dd00128', # this will actually include engine sound so might be confusing
#                'car': '/m/0k4j',  # general car sounds
               'outside_rural': '/t/dd00129'
              }

In [11]:
# Other possible sound labels:

# /t/dd00066,"Medium engine (mid frequency)"
# /t/dd00067,"Heavy engine (low frequency)"
# /m/01h82_,"Engine knocking"
# /t/dd00130,"Engine starting"
# /m/07pb8fc,"Idling"
# /m/07q2z82,"Accelerating, revving, vroom"
# /m/02mk9,"Engine"
# /m/0ltv,"Race car, auto racing"
# /m/07r04,"Truck"
# /m/07rknqz,"Skidding"
# /m/0h9mv,"Tire squeal"
# /m/012f08,"Motor vehicle (road)"
# /m/0k4j,"Car"

In [12]:
def get_video_list(code, dataset):
    '''
    Get the list of video info for all videos for a given label
    '''
    
    dataset_dict = {'train': 'data/balanced_train_segments.csv',
                    'eval': 'data/eval_segments.csv',
                    'unbal': 'data/unbalanced_train_segments.csv'}
    
    with open(dataset_dict.get(dataset)) as f:
        lines = f.readlines()
    dl_list = [line.strip().split(', ') for line in lines[3:]]
    dl_list = [i for i in dl_list if code in i[-1]]
    print('Videos available for this label: ', len(dl_list))
    print("")
    return dl_list

In [13]:
_ = get_video_list(labels_dict.get('outside_rural'), 'unbal')

Videos available for this label:  35291



In [1]:
_ = get_video_list(labels_dict.get('car_passing_by'), 'unbal')

### We can load the AudioSet training set and grab a video to download:

In [16]:
# Select a YouTube video from the training set
def select_video(video_info):
    
    ytid, ts_start, ts_end, labels = video_info
    ts_start, ts_end = float(ts_start), float(ts_end)
    duration = ts_end - ts_start

    print("YouTube ID: " + ytid)
    print("Trim Window: ({}, {})".format(ts_start, ts_end))
    
    return ytid, ts_start, ts_end, duration

### After getting the YouTube ID and trim window for the video we want, we can use `pafy` to get the download URL to the best quality video and audio:

In [17]:
# Get the URL to the video page

def get_urls(ytid):
    video_page_url = 'https://www.youtube.com/watch?v={}'.format(ytid)

    # Get the direct URLs to the videos with best audio and with best video (with audio)
    video = pafy.new(video_page_url)

    best_video = video.getbestvideo()
    best_video_url = best_video.url
    # print("Video URL: " + best_video_url)

    best_audio = video.getbestaudio()
    best_audio_url = best_audio.url
    # print("Audio URL: " + best_audio_url)
    
    return best_video_url, best_audio_url

In [18]:
def make_audio_filepath(audio_path, label, dataset, basename_fmt):
    '''
    Creates the final audio_filepath in a folder within audio_path corresponding to the label
    '''
    newpath = audio_path + dataset + '/' + label + '/'
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    audio_filepath = os.path.join(newpath, basename_fmt + '.' + 'wav')
    
    return audio_filepath

In [19]:
def get_paths(ytid, ts_start, ts_end, label, dataset):
    '''
    Get output video and audio filepaths
    '''
    
    basename_fmt = '{}_{}_{}'.format(ytid, int(ts_start*1000), int(ts_end*1000))
    video_filepath = os.path.join(videos_path, basename_fmt + '.' + video_container)
    flac_filepath = os.path.join(flac_path, basename_fmt + '.' + audio_container)
    audio_filepath = make_audio_filepath(audio_path, label, dataset, basename_fmt)
    
    return flac_filepath, audio_filepath

### We can then use `ffmpeg` to download and transcode to the format desired:
Currently replaces files in the directory. Add '-n' option in args to stop 

In [20]:
# Download the video
def download_video(ffmpeg_path, ts_start, best_video_url, duration, video_container, video_codec, video_filepath):
    
    video_dl_args = [ffmpeg_path,
        '-ss', str(ts_start),   # The beginning of the trim window
        '-i', best_video_url,   # Specify the input video URL
        '-t', str(duration),    # Specify the duration of the output
        '-f', video_container,  # Specify the format (container) of the video
        '-framerate', '30',     # Specify the framerate
        '-vcodec', video_codec, # Specify the output encoding
        video_filepath]

    proc = sp.Popen(video_dl_args, stdout=sp.PIPE, stderr=sp.PIPE)
    stdout, stderr = proc.communicate()
    if proc.returncode != 0:
        print(stderr)
    else:
        print("Downloaded video to " + video_filepath)

In [21]:
# Download the audio in flac
def download_audio(ffmpeg_path, ts_start, best_audio_url, duration, audio_container, audio_codec, flac_filepath):
    
    audio_dl_args = [ffmpeg_path,
        '-ss', str(ts_start),    # The beginning of the trim window
        '-i', best_audio_url,    # Specify the input video URL
        '-t', str(duration),     # Specify the duration of the output
        '-f', audio_container, 
        '-vn',                   # Suppress the video stream
        '-ac', '2',              # Set the number of channels
        '-sample_fmt', 's16',    # Specify the bit depth
        '-acodec', audio_codec,  # Specify the output encoding
        '-ar', '44100',          # Specify the audio sample rate
        flac_filepath]

    proc = sp.Popen(audio_dl_args, stdout=sp.PIPE, stderr=sp.PIPE)
    stdout, stderr = proc.communicate()
    if proc.returncode != 0:
        print(stderr)
    else:
        print("Downloaded audio to " + flac_filepath)

In [22]:
def conv_wav(ffmpeg_path, flac_filepath, audio_filepath):
    '''
    Convert flac to wav
    -y option overwrites files if they exist
    '''
    conv_args = [ffmpeg_path, '-y',
        '-i', flac_filepath,   # Specify the input flac filepath
        audio_filepath]

    proc = sp.Popen(conv_args, stdout=sp.PIPE, stderr=sp.PIPE)
    stdout, stderr = proc.communicate()
    if proc.returncode != 0:
        print(stderr)
    else:
        print("Converted flac to wav, saved to " + audio_filepath)

In [23]:
def get_audio_file(video_info, label, dataset):
    '''
    Putting all the functions together 
    '''
    
    ytid, ts_start, ts_end, duration = select_video(video_info)
    best_video_url, best_audio_url = get_urls(ytid)
    flac_filepath, audio_filepath = get_paths(ytid, ts_start, ts_end, label, dataset)
    if os.path.isfile(audio_filepath):
        print('File already exists')
        print("")
    else:
        download_audio(ffmpeg_path, ts_start, best_audio_url, duration, audio_container, audio_codec, flac_filepath)
        conv_wav(ffmpeg_path, flac_filepath, audio_filepath)
        print("")

In [24]:
def download_all_audio(dataset, labels_dict, sample=None):
    '''
    Looping the get_audio_file function through a given list of labels to download audio files.
    ---
    Keywords
    - dataset: 'train' or 'eval' depending which part of the audioset to get it from. More can be added.
    - labels_dict: pass a dictionary of label codes and a name for it which will 
                   define the folder name audio files will be saved in
    - sample: how many audio files for each label to get. This is useful if we want to keep the dataset balanced.
    '''

    for label, code in labels_dict.items():
        print('Processing label {}'.format(label))
        print("")
        video_list = get_video_list(code, dataset)
        if sample is not None:
            video_list = video_list[:sample]
        errors = 0
        for c, x in enumerate(video_list):
            print('Processing video no {}'.format(c+1))
            try:
                get_audio_file(x, label, dataset)
            except (OSError, AttributeError) as err:
                errors += 1
                print("OS error: {}".format(err))
                continue
        print('There were {} errors'.format(errors))
        print("")
        
    print('Cleaning flac folder')
    for the_file in os.listdir(flac_path):
        file_path = os.path.join(flac_path, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(e)

In [2]:
download_all_audio('unbal', labels_dict, sample=3684)

In [None]:
download_all_audio('train', labels_dict, sample=60)

In [None]:
download_all_audio('eval', labels_dict, sample=60)