# Intro
This notebook shows how to use the freesound API to retrieve sounds to use in ML models. This notebook assumes you have followed the instructions in the github repo to set up an account with freesound and have updated the fs_config.py file with your id and secret

Import the required packages

In [22]:
import freesound
from requests_oauthlib import OAuth2Session
import os
import shutil
import time
import pickle
from requests_ratelimiter import LimiterSession

import fs_config

Set configuration variables using fs_config.py

# Authenticate

In [23]:


authorization_base_url = 'https://freesound.org/apiv2/oauth2/authorize/'
token_url = 'https://freesound.org/apiv2/oauth2/access_token/'
client_id = fs_config.fs_cid # grab from fs_config.py
client_secret = fs_config.fs_client_secret # grab from fs_config.py

Authenticate
The code below will open a link in your browser. You must authorize, and then copy the code into the python input window to continue

In [24]:
# do the OAuth dance
oauth = OAuth2Session(client_id)

authorization_url, state = oauth.authorization_url(
    "https://freesound.org/apiv2/oauth2/authorize/"
)
print(f"Please go to {authorization_url} and authorize access.")

authorization_code = input("Please enter the authorization code:")
oauth_token = oauth.fetch_token(
    "https://freesound.org/apiv2/oauth2/access_token/",
    authorization_code,
    client_secret=client_secret,
)

# oauth_token
client = freesound.FreesoundClient()
client.set_token(oauth_token["access_token"], "oauth")

Please go to https://freesound.org/apiv2/oauth2/authorize/?response_type=code&client_id=7yB3RiAb8ipn6BQuwvyn&state=HaXy1tXyWpbZ3igV5UOSGBuwSl0Dxl and authorize access.


Example of how to get a sound using the API

In [25]:
# Get sound info example
print("Sound info:")
print("-----------")
sound = client.get_sound(96541)
print("Getting sound:", sound.name)
print("Url:", sound.url)
print("Description:", sound.description)
print("Tags:", " ".join(sound.tags))
print()

Sound info:
-----------
Getting sound: coming soon.wav
Url: https://freesound.org/people/tim.kahn/sounds/96541/
Description: For the user "Stealth Inc.", my girlfriend saying "Coming soon". Recorded with an AT2020 microphone into an Apogee Duet and edited with Record.
Tags: request female girl talk vocal voice american woman english speak



# About the data:
For this project, we are going to use data uploaded by the Music Technology Group (MTG) of the Universitat Pompeu Fabra in Barcelona. Each sound is roughly 5 seconds of a single instrument playing either notes or scales with different dynamics. We are going to iterate through the list of sounds uploaded by the user MTG and retrieve information about the sounds

In [26]:
# dir(user_sounds[0])

def fs_download(sound):
    """For a given freesound sound object, download audio file + metadata + analysis"""
    # basic info
    sound_info = sound.as_dict()

    # download sound:
    # get basic info
    # check if sound is in file system
    # downloaded_sounds = [x[0] for x in os.walk("./freesound")]
    downloaded_sounds = []

    for root, dirs, files in os.walk("./freesound"):
        for directory in dirs:
            downloaded_sounds.append(directory)
    # if in file_system:
    if str(sound_info['id']) in downloaded_sounds:
        # r"C:\Users\{}\Documents\file.txt".format(username)
        # skip
        print('folder already created. skipping')
        pass
    else:
        # make directory
        sound_dir = f"./freesound/{sound_info['id']}"
        os.mkdir(sound_dir)
        
        # save basic info
        info_pkl = f"{sound_dir}/sound_metadata.pkl"
        with open(info_pkl, 'wb') as f:
            pickle.dump(sound_info, f)
        # save analysis
        sound_analysis = sound.get_analysis().as_dict()
        analysis_pkl = f"{sound_dir}/sound_analysis.pkl"
        with open(analysis_pkl,'wb') as f:
            pickle.dump(sound_analysis,f)
            
        # download
        name_string = f"{sound_info['name']}.{sound_info['type']}"
        name_string = name_string.replace('\r', '')
        sound.retrieve(directory=sound_dir,name = name_string)
        

Note that freesound API has a rate limit of 60 requests/minute and 5000 requests per 24 hours. So we can download a sample of the sounds in one day

In [27]:
from requests_ratelimiter import LimiterSession

# Apply a rate-limit (59 requests per minute) to all requests
client.session = LimiterSession(per_minute=59)

In [28]:
# for simplicity, lets start with all MTG sounds - very clean, usually single instruments
# search for MTG
user = client.get_user("MTG")
# print(dir(user))
print(user.num_sounds)
user_sounds = user.get_sounds(page_size = 100,fields = "id,name,tags,created,license,type,channels,filesize,bitrate,bitdepth,duration,samplerate,username",
                              sort = 'rating_desc')


8117


We have 8117 sounds uploaded by the user. The sounds are presented in a freesound.pager object. The following code iterates through the pager object to download the sounds.

In [29]:
num_pages = user.num_sounds // 10 + 1
print(num_pages)
# num_pages = 2 # temp for testing

812


In [30]:
page_num = 1
while page_num < num_pages:
    for sound in user_sounds:
        print("\t-", sound.name, "by", sound.username, "tags", sound.tags)
        fs_download(sound)
    time.sleep(5)
    tries = 0
    while tries < 10:
        try:
            user_sounds = user_sounds.next_page()
            tries = 10
        except freesound.FreesoundException as e:
            print('bad gateway, trying again')
            tries += 1
            time.sleep(3)
    
    # increment page
    page_num += 1

	- Happy 15th birthday Freesound! by MTG tags ['synthetized', 'Freesound15Years', 'voice-synthesis', 'voctrolabs', 'happy-birthday', 'choir', 'birthday', 'freesound', 'mtg']
folder already created. skipping
	- Flute - G5 - other by MTG tags ['G5', 'flute', 'neumann-U87', 'multisample', 'single-note', 'good-sounds']
folder already created. skipping
	- Flute - D#4 - bad-stability-timbre by MTG tags ['flute', 'Dsharp4', 'neumann-U87', 'multisample', 'single-note', 'good-sounds']
folder already created. skipping
	- Flute - C5 - bad-richness by MTG tags ['good-sounds', 'C5', 'single-note', 'multisample', 'neumann-U87', 'flute']
folder already created. skipping
	- Flute - F#6 - bad-stability-pitch by MTG tags ['Fsharp6', 'flute', 'neumann-U87', 'multisample', 'single-note', 'good-sounds']
folder already created. skipping
	- Flute - C#5 - bad-stability-pitch by MTG tags ['Csharp5', 'flute', 'neumann-U87', 'multisample', 'single-note', 'good-sounds']
folder already created. skipping
	- Flute -

FreesoundException: <FreesoundException: code=429, detail="Too Many Requests">

In [None]:
# audit file system

def audit_fs(base_dir='./freesound'):
    """
    Ensure the underlying file structure matches the expected format.
    
    In each folder inside the base directory, there should be:
    - sound_analysis.pkl
    - sound_metadata.pkl
    - One .wav file (which can have any name)
    
    Args:
        base_dir (str): The base directory to audit. Defaults to './freesound'.
        
    Returns:
        dict: A dictionary containing folder names as keys and a list of missing or extra files as values.
    """
    audit_results = {}

    # Ensure the base directory exists
    if not os.path.isdir(base_dir):
        return {base_dir: ['Base directory does not exist']}
    
    # Iterate over all items in the base directory
    for folder_name in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder_name)
        
        # Check if the item is a directory
        if os.path.isdir(folder_path):
            expected_files = {'sound_analysis.pkl', 'sound_metadata.pkl'}
            wav_files = []
            present_files = set()
            
            # Check each file in the subdirectory
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                if os.path.isfile(file_path):
                    present_files.add(file_name)
                    if file_name.endswith('.wav'):
                        wav_files.append(file_name)
            
            # Check for missing and extra files
            missing_files = expected_files - present_files
            extra_files = present_files - expected_files - set(wav_files)
            
            if len(wav_files) != 1:
                extra_files.update(wav_files if len(wav_files) > 1 else [])
                missing_files.update(['one .wav file'] if len(wav_files) == 0 else [])
            
            # Record issues in the audit results
            if missing_files or extra_files:
                audit_results[folder_name] = {
                    'missing_files': list(missing_files),
                    'extra_files': list(extra_files)
                }
    
    return audit_results

# # Example usage
# if __name__ == "__main__":
#     results = audit_fs('./freesound')
#     for folder, issues in results.items():
#         print(f"Issues in folder '{folder}':")
#         if issues['missing_files']:
#             print(f"  Missing files: {', '.join(issues['missing_files'])}")
#         if issues['extra_files']:
#             print(f"  Extra files: {', '.join(issues['extra_files'])}")
#         print()
fs_audit_res = audit_fs()
fs_audit_res
# for dir_ in list(fs_audit_res.keys()):
#     dir_path = os.path.join('./freesound', dir_)
    
#     shutil.rmtree(dir_path)
#     # os.rmdir(dir_path)
#     print(dir_path)
    


./freesound\247952
./freesound\249008
./freesound\249009
./freesound\249010
./freesound\249011
./freesound\249012
./freesound\249013
./freesound\249014
./freesound\249015
./freesound\249016
./freesound\249017
./freesound\249018
./freesound\249019
./freesound\249020
./freesound\249021
./freesound\249022
./freesound\249023
./freesound\249024
./freesound\249025
./freesound\249026
./freesound\249027
./freesound\249028
./freesound\249029
./freesound\249030
./freesound\249031
./freesound\249032
./freesound\249033
./freesound\249034
./freesound\249035
./freesound\249036
./freesound\249037
./freesound\249038
./freesound\249039
./freesound\249040
./freesound\249041
./freesound\249042
./freesound\249043
./freesound\249044
./freesound\249045
./freesound\249046
./freesound\249047
./freesound\249048
./freesound\249049
./freesound\249050
./freesound\249051
./freesound\249052
./freesound\249053
./freesound\249054
./freesound\249055
./freesound\249056
./freesound\249057
./freesound\249058
./freesound\

In [None]:
# audit
# next steps:
# add relevant directories to gitignore
# add git ignore file to github dknapp17/audio_ML
# manually add config and notebooks and requirements.txt to github
# write out workflow for freesound API and running freesound notebook
# document notebook, add cool visuals
# resubmit to github