In [None]:
from collections import defaultdict
from os.path import join, exists, splitext
from os import listdir, symlink, makedirs
from shutil import copyfile
from praatio import tgio
import pandas as pd

In [None]:
# directory where the data resides
data_root = '/data/esc-50/'

In [None]:
# src and destination directories
load_dir = join(data_root, 'raw')
save_root = join(data_root, 'processed')

In [None]:
makedirs(save_root, exist_ok=True)

In [None]:
load_audio_dir = join(load_dir, 'audio')
load_annotation_dir = join(load_dir, 'annotations')

In [None]:
files = listdir(load_audio_dir)

In [None]:
save_audio_dir = join(save_root, 'audio')
makedirs(save_audio_dir, exist_ok=True)

Create symlinks to the original `.wav` files

In [None]:
for file in files:
    dest = join(save_audio_dir, file)
    if not exists(dest):
        symlink(join(load_audio_dir, file), dest)

In [None]:
copyfile(join(load_dir, 'meta/esc50.csv'), join(save_root, 'attributes.csv'))

In [None]:
df = pd.read_csv(join(load_dir, 'meta/esc50.csv'))

In [None]:
df.head()

In [None]:
# to maintain the format that one file can contain many classification targets, and hence, 
# corresponding to each file there should be a list of classification values.
classification_targets = [[v] for v in df['category'].values]
files = [splitext(f)[0] for f in df.filename.values]

In [None]:
# create dataframe storing the data
df = pd.DataFrame({'file': files, 'classification': classification_targets})

In [None]:
df.head()

In [None]:
# save the dataframe
annotation_save_path = join(save_root, 'annotation.csv')
df.to_csv(annotation_save_path, index=False)

In [None]:
description = "\
Annotation columns: \n \
`classification`: valid labels = ['airplane', 'breathing', 'brushing_teeth', \
 'can_opening', 'car_horn', 'cat', 'chainsaw', 'chirping_birds', \
 'church_bells', 'clapping', 'clock_alarm', 'clock_tick', 'coughing', \
 'cow', 'crackling_fire', 'crickets', 'crow', 'crying_baby', 'dog', \
 'door_wood_creaks', 'door_wood_knock', 'drinking_sipping', 'engine', 'fireworks', \
 'footsteps', 'frog', 'glass_breaking', 'hand_saw', \
 'helicopter', 'hen', 'insects', 'keyboard_typing', 'laughing', 'mouse_click', 'pig', 'pouring_water', \
 'rain', 'rooster', 'sea_waves', 'sheep', 'siren', 'sneezing', 'snoring', 'thunderstorm', \
 'toilet_flush', 'train', 'vacuum_cleaner', 'washing_machine', 'water_drops', 'wind'] \n \
\n \
File format: \n \
'audio/*.wav' \
2000 audio recordings in WAV format (5 seconds, 44.1 kHz, mono) with the following naming convention: \n \
`{FOLD}-{CLIP_ID}-{TAKE}-{TARGET}.wav` \n \
- `{FOLD}` - index of the cross-validation fold, \n \
- `{CLIP_ID}` - ID of the original Freesound clip, \n \
- `{TAKE}` - letter disambiguating between different fragments from the same Freesound clip, \n \
- `{TARGET}` - class in numeric format [0, 49]."

In [None]:
with open(join(save_root, 'description.txt'), 'w') as f:
    f.write(description)