In [12]:
from torch.utils.data import Dataset
import torchaudio
import pandas as pd
import os

We can create custom Datasets by linking it to the Dataset class and overwriting :
- \_\_len\_\_(): it's what len(data) should return
- \_\_getitem\_\_(): it's what data[i] should return

In [3]:
class UrbanSoundDataset(Dataset):
    def __init__(self, annotations_path, audio_dir):
        self.annotations = pd.read_csv(annotations_path)
        self.audio_dir = audio_dir

    def __len__(self):
        return len(self.annotations)        

    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        label = self._get_audio_sample_label(index)
        signal, sr = torchaudio.load(audio_sample_path)
        return signal, label

    def _get_audio_sample_path(self, index):
        folder = f'fold{self.annotations.iloc[index, 5]}'
        filename = self.annotations.iloc[index, 0]
        path = os.path.join(self.audio_dir, folder, filename)
        return path

    def _get_audio_sample_label(self, index):
        return self.annotations.iloc[index, 6]


Testing the implementation on the Urban Sound Dataset that I downloaded from [Kaggle](https://www.kaggle.com/datasets/chrisfilo/urbansound8k).

In [15]:
usd = UrbanSoundDataset(annotations_path='../../../Downloads/archive(4)/UrbanSound8K.csv',
                        audio_dir='../../../Downloads/archive(4)')

In [19]:
# Should return around 8k samples in the entire dataset
len(usd)

8732

In [22]:
# Should return 3 for the first label
usd[0][1]

3