# Audio Data Block

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
#Export

#Internal dependencies
from exp.nb_AudioCommon import * 
from exp.nb_DataAugmentation import * 

In [3]:
#Export

#External dependencies
import mimetypes
from fastai.vision import *
import torchaudio
from torchaudio import transforms

#for jupyter Display
from IPython.display import Audio

## Sample data for test

In [4]:
# Standard path notation for fast.ai
# The files willbe saved on $HOME/.fastai/data/timit/
path = getFastAiWorkingDirectory('timit')

Working directory: /home/ubuntu/.fastai/data/timit


## Data Block classes

In [5]:
DataBunch

fastai.basic_data.DataBunch

### AudioItem
This is the base class of fast.ai Audio tht holds a reference to and AudioData object.

In [6]:
#Export        
class AudioItem(ItemBase):
    def __init__(self, data:AudioData, **kwargs):
        self.data = data # Always flatten out to single dimension signal!
        self.kwargs = kwargs

    def __str__(self):
        return 'REPRESENTATION'
        if isinstance(self.data, AudioData): return f'Duration: {self.duration} seconds.'
        else: return f'{type(self.data)}: {self.data.shape}'
    def __len__(self): return self.data.sig.shape[0]
    def _repr_html_(self): return f'{self.__str__()}<br />{self.ipy_audio._repr_html_()}'
    
    def show(self, title:Optional[str]=None, **kwargs):
        "Show sound on `ax` with `title`, using `cmap` if single-channel, overlaid with optional `y`"
        self.hear(title=title)

    def hear(self, title=None):
        if title is not None: print(title)
        display(self.ipy_audio)

    def apply_tfms(self, tfms):
        for tfm in tfms:
            self.data = tfm(self.data)
        return self
        
    @property
    def shape(self):
        return self.data.sig.shape
    
    @property
    def ipy_audio(self):
        return Audio(data=self.data.sig, rate=self.data.sr)

    @property
    def duration(self): return len(self.data.sig)/self.data.sr

In [7]:
def test_AudioItem_create_from_data():
    signal,samplerate = torchaudio.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV'))
    a = AudioItem(AudioData(signal,samplerate))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)

test_AudioItem_create_from_data()

In [8]:
def test_AudioItem_create_from_audio_file():
    a = AudioItem(AudioData.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV')))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)
    
test_AudioItem_create_from_audio_file()

## AudioDataBunch

In [9]:
#Export
class AudioDataBunch(DataBunch):
    def hear_ex(self, rows:int=3, ds_type:DatasetType=DatasetType.Valid, **kwargs):
        batch = self.dl(ds_type).dataset[:rows]
        self.train_ds.hear_xys(batch.x, batch.y, **kwargs)

### AudioList
This class is responsible to contain a list of AudioItem.

In [10]:
#Export
class AudioList(ItemList):
    _bunch = AudioDataBunch
    
    # TODO: __REPR__    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
            
    def get(self, i):
        item = self.items[i]
        try:         
            if isinstance(item, (Path, str)):
                return AudioItem(AudioData.load(str(item)))
            if isinstance(item, tuple): #data,sr
                return AudioItem(AudioData(item[0],item[1]))
        except AppError as error:
            print('Format not supported!', file=sys.stderr)
            raise
    
    def reconstruct(self, t:Tensor): return Image(t.transpose(1,2))

    def hear_xys(self, xs, ys, **kwargs):
        for x, y in zip(xs, ys): x.hear(title=y, **kwargs)
            
    # TODO: example with from_folder
    @classmethod
    def from_folder(cls, path:PathOrStr='.', extensions:Collection[str]=None, **kwargs)->ItemList:
        extensions = ifnone(extensions, AUDIO_EXTENSIONS)
        return super().from_folder(path=path, extensions=extensions, **kwargs)


In [11]:
def test_AudioList_from_df_file_names():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    display(df.head())

    #Crete AudioList
    ils = AudioList.from_df(df, path, cols=['FileName'])
    
    #Test a item
    i=5
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_file_names()

Unnamed: 0,FileName
0,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...
1,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...
2,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...
3,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...
4,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...


FileName: /home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT1/SI1703.WAV
torch.Size([42599]) 16000


In [12]:
def test_AudioList_from_folder():
    p = path/'TRAIN'

    #Create AudioList
    ils = AudioList.from_folder(p)
    
    #Test an item
    i=4
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_folder()

torch.Size([39117]) 16000


In [13]:
def test_AudioList_from_df_data_and_sr():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    df['SampleAndSr']=df['FileName'].apply(lambda n: torchaudio.load(n))
    #df = df['tmp'].drop()
    
    display(df.head())

    #Create AudioList
    ils = AudioList.from_df(df, path, cols=['SampleAndSr'])
    
    #Test a item
    i=4
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_data_and_sr()

Unnamed: 0,FileName,SampleAndSr
0,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...,"([[tensor(-6.1035e-05), tensor(-9.1553e-05), t..."
1,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...,"([[tensor(0.0002), tensor(0.0001), tensor(9.15..."
2,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...,"([[tensor(0.0002), tensor(0.0001), tensor(9.15..."
3,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...,"([[tensor(0.0001), tensor(0.0002), tensor(9.15..."
4,/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT...,"([[tensor(0.0001), tensor(9.1553e-05), tensor(..."


FileName: /home/ubuntu/.fastai/data/timit/TRAIN/DR6/MSAT1/SX263.WAV
torch.Size([39117]) 16000


### Sample AudioDataBunch Usage

In [14]:
import glob

def process_phn_file(p_file, sig, sr, delimiter=' '):
    df = pd.read_csv(p_file, delimiter=delimiter, header=None)
    df.columns = ['Start', 'End', 'Phn']
    df['SampleAndSr'] = df.apply(lambda x : (sig[-1][x['Start']: x['End']], sr), axis=1)
    return df

def create_phn_df(path, count=100):
    phns = []
    final = pd.DataFrame()

    for phn_file in glob.glob(str(path/'**/*.PHN'), recursive=True)[:count]:
        sig,sr = torchaudio.load(str(phn_file.replace('PHN', 'WAV')))
        df = process_phn_file(phn_file, sig, sr, delimiter=' ')
        df['Source'] = phn_file
        final = final.append(df, ignore_index=True)
    return df

df = create_phn_df(path/'TRAIN')
df.head()

Unnamed: 0,Start,End,Phn,SampleAndSr,Source
0,0,2480,h#,"([tensor(0.), tensor(-3.0518e-05), tensor(0.),...",/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MJRK...
1,2480,3310,ix,"([tensor(-0.0001), tensor(0.0009), tensor(0.00...",/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MJRK...
2,3310,4680,z,"([tensor(-0.0028), tensor(-0.0015), tensor(0.0...",/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MJRK...
3,4680,5720,kcl,"([tensor(-3.0518e-05), tensor(-6.1035e-05), te...",/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MJRK...
4,5720,6910,k,"([tensor(0.0005), tensor(0.0005), tensor(0.000...",/home/ubuntu/.fastai/data/timit/TRAIN/DR6/MJRK...


In [15]:
step0 = AudioList.from_df(df, path, cols=['SampleAndSr']); print(type(step0))

<class '__main__.AudioList'>


In [16]:
step0

AudioList (46 items)
REPRESENTATION,REPRESENTATION,REPRESENTATION,REPRESENTATION,REPRESENTATION
Path: /home/ubuntu/.fastai/data/timit

In [17]:
step1 = step0.split_by_rand_pct(0.1, seed=1);
print(f'Result type: {type(step1)}, Type of Items: {type(step1.lists[0])}')

Result type: <class 'fastai.data_block.ItemLists'>, Type of Items: <class '__main__.AudioList'>


In [18]:
step2 = step1.label_from_df('Phn');
print(f'Result type: {type(step2)}')

Result type: <class 'fastai.data_block.LabelLists'>


Your valid set contained the following unknown labels, the corresponding items have been discarded.
tcl, r
  if getattr(ds, 'warn', False): warn(ds.warn)


In [19]:
#Export
def get_audio_transforms(spectro:bool=False,
                         white_noise:bool=True,
                         modulate_volume:bool=True,
                         random_cutout:bool=True,
                         pad_with_silence:bool=True,
                         pitch_warp:bool=True,
                         down_and_up:bool=True,
                         mx_to_pad:int=1000,
                         xtra_tfms:Optional[Collection[Transform]]=None,
                         **kwargs)->Collection[Transform]:
    "Utility func to easily create a list of audio transforms."
    res = []
    if white_noise: res.append(partial(tfm_add_white_noise, noise_scl=0.005, **kwargs))
    if modulate_volume: res.append(partial(tfm_modulate_volume, lower_gain=.1, upper_gain=1.2, **kwargs))
    if random_cutout: res.append(partial(tfm_random_cutout, pct_to_cut=.15, **kwargs))
    if pad_with_silence: res.append(partial(tfm_pad_with_silence, pct_to_pad=.15, min_to_pad=None, max_to_pad=None, **kwargs))
    if pitch_warp: res.append(partial(tfm_pitch_warp, shift_by_pitch=None, bins_per_octave=12, **kwargs))
    if down_and_up: res.append(partial(tfm_down_and_up, sr_divisor=2, **kwargs))
    res.append(partial(tfm_pad_to_max, mx=mx_to_pad))
    final_transform = tfm_extract_signal
    if spectro: final_transform = tfm_spectro
    res.append(final_transform)
    #       train                   , valid
    return (res + listify(xtra_tfms), [partial(tfm_pad_to_max, mx=mx_to_pad), final_transform])


In [20]:
batch_size = 8

In [21]:
tfms = get_audio_transforms()
step3 = step2.transform(tfms);
print(type(step3))

<class 'fastai.data_block.LabelLists'>


In [22]:
step4 = step3.databunch(bs=batch_size);
print(type(step4))

<class '__main__.AudioDataBunch'>


In [23]:
step4 = step3.databunch(bs=batch_size);
step4

AudioDataBunch;

Train: LabelList (42 items)
x: AudioList
REPRESENTATION,REPRESENTATION,REPRESENTATION,REPRESENTATION,REPRESENTATION
y: CategoryList
h#,ix,kcl,k,ow
Path: /home/ubuntu/.fastai/data/timit;

Valid: LabelList (2 items)
x: AudioList
REPRESENTATION,REPRESENTATION
y: CategoryList
dx,z
Path: /home/ubuntu/.fastai/data/timit;

Test: None

In [24]:
max_len = 1000
datablock = step4
batch = datablock.one_batch()
print(len(batch[0]), batch[0].shape)
assert batch_size == len(batch[0])
assert max_len == batch[0].shape[1]

8 torch.Size([8, 1000])


In [25]:
# datablock.show_batch()

# Export

In [26]:
!python notebook2script.py DataBlock.ipynb

Converted DataBlock.ipynb to nb_DataBlock.py
