##### Importación de librerías

In [1]:
import librosa
import librosa.display
import tqdm
import os
import scipy

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

##### Carga de los datasets

In [2]:
urbansound8k_path = "../../data/raw/UrbanSound8K/"

# Primer dataset - Carpeta con los audios
urbansound8k_audio_path = "audio/"

# Segundo dataset - Archivo CSV con metadatos de los audios
urbansound8k_metadata_path_file = "metadata/UrbanSound8K.csv"


In [3]:
def listar_archivos(directorio, nivel=0, max_nivel=None):

    if max_nivel is not None and nivel > max_nivel:
        return
    
    try:
        contenidos = os.listdir(directorio)
    except PermissionError:
        print(f"No se pudo acceder a {directorio}")
        return

    for contenido in contenidos[:5]:
        ruta_contenido = os.path.join(directorio, contenido)
        print(' ' * (nivel * 4) + '|-- ' + contenido)
        if os.path.isdir(ruta_contenido):
            listar_archivos(ruta_contenido, nivel + 1, max_nivel)

# Directorio raíz
directorio_base = urbansound8k_path + 'audio/'

# Llamada a la función
listar_archivos(directorio_base)

|-- .DS_Store
|-- fold1
    |-- .DS_Store
    |-- 101415-3-0-2.wav
    |-- 101415-3-0-3.wav
    |-- 101415-3-0-8.wav
    |-- 102106-3-0-0.wav
|-- fold10
    |-- .DS_Store
    |-- 100648-1-0-0.wav
    |-- 100648-1-1-0.wav
    |-- 100648-1-2-0.wav
    |-- 100648-1-3-0.wav
|-- fold2
    |-- .DS_Store
    |-- 100652-3-0-0.wav
    |-- 100652-3-0-1.wav
    |-- 100652-3-0-2.wav
    |-- 100652-3-0-3.wav
|-- fold3
    |-- .DS_Store
    |-- 102105-3-0-0.wav
    |-- 103199-4-0-0.wav
    |-- 103199-4-0-3.wav
    |-- 103199-4-0-4.wav


In [4]:
metadata_urbansound8k_df = pd.read_csv(urbansound8k_path + urbansound8k_metadata_path_file, sep=",")
display(metadata_urbansound8k_df.head(5))

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


UrbanSound8K
============

Created By
----------

Justin Salamon*^, Christopher Jacoby* and Juan Pablo Bello*
* Music and Audio Research Lab (MARL), New York University, USA
^ Center for Urban Science and Progress (CUSP), New York University, USA
http://serv.cusp.nyu.edu/projects/urbansounddataset
http://marl.smusic.nyu.edu/
http://cusp.nyu.edu/

Version 1.0


Description
-----------

This dataset contains 8732 labeled sound excerpts (<=4s) of urban sounds from 10 classes: air_conditioner, car_horn, 
children_playing, dog_bark, drilling, engine_idling, gun_shot, jackhammer, siren, and street_music. The classes are 
drawn from the urban sound taxonomy described in the following article, which also includes a detailed description of 
the dataset and how it was compiled:

J. Salamon, C. Jacoby and J. P. Bello, "A Dataset and Taxonomy for Urban Sound Research", 
22nd ACM International Conference on Multimedia, Orlando USA, Nov. 2014.

All excerpts are taken from field recordings uploaded to www.freesound.org. The files are pre-sorted into ten folds
(folders named fold1-fold10) to help in the reproduction of and comparison with the automatic classification results
reported in the article above.

In addition to the sound excerpts, a CSV file containing metadata about each excerpt is also provided.


Audio Files Included
--------------------

8732 audio files of urban sounds (see description above) in WAV format. The sampling rate, bit depth, and number of 
channels are the same as those of the original file uploaded to Freesound (and hence may vary from file to file).


Meta-data Files Included
------------------------

UrbanSound8k.csv

This file contains meta-data information about every audio file in the dataset. This includes:

* slice_file_name: 
The name of the audio file. The name takes the following format: [fsID]-[classID]-[occurrenceID]-[sliceID].wav, where:
[fsID] = the Freesound ID of the recording from which this excerpt (slice) is taken
[classID] = a numeric identifier of the sound class (see description of classID below for further details)
[occurrenceID] = a numeric identifier to distinguish different occurrences of the sound within the original recording
[sliceID] = a numeric identifier to distinguish different slices taken from the same occurrence

* fsID:
The Freesound ID of the recording from which this excerpt (slice) is taken

* start
The start time of the slice in the original Freesound recording

* end:
The end time of slice in the original Freesound recording

* salience:
A (subjective) salience rating of the sound. 1 = foreground, 2 = background.

* fold:
The fold number (1-10) to which this file has been allocated.

* classID:
A numeric identifier of the sound class:
0 = air_conditioner
1 = car_horn
2 = children_playing
3 = dog_bark
4 = drilling
5 = engine_idling
6 = gun_shot
7 = jackhammer
8 = siren
9 = street_music

* class:
The class name: air_conditioner, car_horn, children_playing, dog_bark, drilling, engine_idling, gun_shot, jackhammer, 
siren, street_music.


Please Acknowledge UrbanSound8K in Academic Research
----------------------------------------------------

When UrbanSound8K is used for academic research, we would highly appreciate it if scientific publications of works 
partly based on the UrbanSound8K dataset cite the following publication:

J. Salamon, C. Jacoby and J. P. Bello, "A Dataset and Taxonomy for Urban Sound Research", 
22nd ACM International Conference on Multimedia, Orlando USA, Nov. 2014.

The creation of this dataset was supported by a seed grant by NYU's Center for Urban Science and Progress (CUSP).


Conditions of Use
-----------------

Dataset compiled by Justin Salamon, Christopher Jacoby and Juan Pablo Bello. All files are excerpts of recordings
uploaded to www.freesound.org. Please see FREESOUNDCREDITS.txt for an attribution list.
 
The UrbanSound8K dataset is offered free of charge for non-commercial use only under the terms of the Creative Commons
Attribution Noncommercial License (by-nc), version 3.0: http://creativecommons.org/licenses/by-nc/3.0/
 
The dataset and its contents are made available on an "as is" basis and without warranties of any kind, including 
without limitation satisfactory quality and conformity, merchantability, fitness for a particular purpose, accuracy or 
completeness, or absence of errors. Subject to any liability that may not be excluded or limited by law, NYU is not 
liable for, and expressly excludes, all liability for loss or damage however and whenever caused to anyone by any use of
the UrbanSound8K dataset or any part of it.


Feedback
--------

Please help us improve UrbanSound8K by sending your feedback to: justin.salamon@nyu.edu or justin.salamon@gmail.com
In case of a problem report please include as many details as possible.
