# 1) Create source collection dataset

In [1]:
import os
import pandas as pd
import numpy as np
import freesound
from IPython.display import display

FREESOUND_API_KEY = 'PYENx7ntOXOzvXfoRzICwWdDzb6wdBsl0E5FThtx'  # Get one at https://freesound.org/apiv2/apply/
FREESOUND_STORE_METADATA_FIELDS = ['id', 'name', 'username', 'duration', 'previews', 'license']
FILES_DIR = 'files'  # Will be relative to the current folder
N_SOUNDS_PER_QUERY = 10  # Max 150
DATAFRAME_FILENAME = 'dataframe.csv'

freesound_client = freesound.FreesoundClient()
freesound_client.set_token(FREESOUND_API_KEY)
if not os.path.exists(FILES_DIR): os.mkdir(FILES_DIR)

In [2]:
def query_freesound(query):
    pager = freesound_client.text_search(
        query = query,
        filter = 'duration:[0 TO 30]',  # Take sounds lasting less than 30 seconds
        fields = ','.join(FREESOUND_STORE_METADATA_FIELDS),
        group_by_pack = 1,
        page_size = N_SOUNDS_PER_QUERY
    )
    return [sound for sound in pager]

def retrieve_sound_preview(sound, directory):
    return freesound.FSRequest.retrieve(
        sound.previews.preview_hq_ogg,
        freesound_client,
        os.path.join(directory, sound.previews.preview_hq_ogg.split('/')[-1])
    )

def make_pandas_record(fs_object):
    record = {key: fs_object.as_dict()[key] for key in FREESOUND_STORE_METADATA_FIELDS}
    del record['previews']  # Don't store previews dict in data frame
    record['freesound_id'] = record['id']
    del record['id']
    record['path'] = "files/" + fs_object.previews.preview_hq_ogg.split("/")[-1]
    return record


# Define freesound queries and get sounds
animal_sounds = ["dog bark", "cat meow", "lion roar", "nightingale"]
sounds = sum([query_freesound(category) for category in animal_sounds],[])

# Download sounds
for count, sound in enumerate(sounds):
    print('Downloading sound with id {0} [{1}/{2}]'.format(sound.id, count + 1, len(sounds)))
    retrieve_sound_preview(sound, 'files/')
    
# Make pandas data frame with retrieved sound metadata and save it
df =  pd.DataFrame([make_pandas_record(s) for s in sounds])
df.to_csv(DATAFRAME_FILENAME)
print('Saved dataframe with {0} entries! {1}'.format(len(df), DATAFRAME_FILENAME))

Downloading sound with id 337101 [1/40]
Downloading sound with id 327666 [2/40]
Downloading sound with id 413758 [3/40]
Downloading sound with id 163459 [4/40]
Downloading sound with id 456943 [5/40]
Downloading sound with id 418106 [6/40]
Downloading sound with id 420448 [7/40]
Downloading sound with id 236016 [8/40]
Downloading sound with id 115536 [9/40]
Downloading sound with id 160093 [10/40]
Downloading sound with id 415209 [11/40]
Downloading sound with id 213889 [12/40]
Downloading sound with id 341545 [13/40]
Downloading sound with id 365061 [14/40]
Downloading sound with id 110011 [15/40]
Downloading sound with id 61259 [16/40]
Downloading sound with id 412017 [17/40]
Downloading sound with id 268795 [18/40]
Downloading sound with id 66511 [19/40]
Downloading sound with id 274989 [20/40]
Downloading sound with id 212764 [21/40]
Downloading sound with id 232289 [22/40]
Downloading sound with id 174466 [23/40]
Downloading sound with id 415878 [24/40]
Downloading sound with id 6

In [3]:
# Show data frame contents
display(df)

Unnamed: 0,duration,freesound_id,license,name,path,username
0,21.7587,337101,http://creativecommons.org/publicdomain/zero/1.0/,Group_of_Dogs_Barking.WAV,files/337101_3474310-hq.ogg,ivolipa
1,6.47247,327666,http://creativecommons.org/licenses/by-nc/3.0/,Dog Bark.wav,files/327666_5632380-hq.ogg,Juan_Merie_Venter
2,8.86712,413758,http://creativecommons.org/publicdomain/zero/1.0/,dogbarking.mp3,files/413758_7958399-hq.ogg,lala_davis554
3,1.95388,163459,http://creativecommons.org/publicdomain/zero/1.0/,LBS_FX DOG Small Alert Bark001.wav,files/163459_2965892-hq.ogg,LittleBigSounds
4,29.0,456943,http://creativecommons.org/publicdomain/zero/1.0/,Blossom Bark 29sec mix.wav,files/456943_3194431-hq.ogg,Zajjman
5,0.599796,418106,http://creativecommons.org/licenses/by/3.0/,single dog bark 2,files/418106_6078577-hq.ogg,crazymonke9
6,1.10599,420448,http://creativecommons.org/publicdomain/zero/1.0/,Barking 3.wav,files/420448_2402876-hq.ogg,Mrthenoronha
7,0.545669,236016,http://creativecommons.org/licenses/by-nc/3.0/,Jazz the Dog Howl & Bark (131).wav,files/236016_180659-hq.ogg,delphidebrain
8,2.964403,115536,http://creativecommons.org/licenses/by/3.0/,Two Barks.wav,files/115536_1956076-hq.ogg,Puniho
9,0.303129,160093,http://creativecommons.org/licenses/by/3.0/,Dog bark 2,files/160093_2888453-hq.ogg,jorickhoofd
