# Audio Data Block

In [155]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [156]:
#Export

#Internal dependencies
from exp.nb_AudioCommon import * 

In [157]:
#Export

#External dependencies
import mimetypes
from fastai.vision import *
import torchaudio
from torchaudio import transforms

#for jupyter Display
from IPython.display import Audio

## Sample data for test

In [158]:
# Standard path notation for fast.ai
# The files willbe saved on $HOME/.fastai/data/timit/
path = getFastAiWorkingDirectory('timit')

Working directory: /home/jupyter/.fastai/data/timit


## Data Block classes

### AudioItem
This is the base class of fast.ai Audio tht holds a reference to and AudioData object.

In [159]:
#Export        
class AudioItem(ItemBase):
    def __init__(self, data:AudioData, **kwargs):
        self.data = data # Always flatten out to single dimension signal!
        self.kwargs = kwargs

    def __str__(self): return f'Duration: {self.duration} seconds.'
    def __len__(self): return self.data.sig.shape[0]
    def _repr_html_(self): return f'{self.__str__()}<br />{self.ipy_audio._repr_html_()}'
    
    def show(self, title:Optional[str]=None, **kwargs):
        "Show sound on `ax` with `title`, using `cmap` if single-channel, overlaid with optional `y`"
        self.hear(title=title)

    def hear(self, title=None):
        if title is not None: print(title)
        display(self.ipy_audio)

    def apply_tfms(self, tfms):
        for tfm in tfms:
            self.data = tfm(self.data)
        return self
        
    @property
    def shape(self):
        return self.data.sig.shape
    
    @property
    def ipy_audio(self): 
        return Audio(data=self.data.sig, rate=self.data.sr)

    @property
    def duration(self): return len(self.data.sig)/self.data.sr

In [160]:
def test_AudioItem_create_from_data():
    signal,samplerate = torchaudio.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV'))
    a = AudioItem(AudioData(signal,samplerate))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)

test_AudioItem_create_from_data()

In [161]:
def test_AudioItem_create_from_audio_file():
    a = AudioItem(AudioData.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV')))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)
    
test_AudioItem_create_from_audio_file()

## AudioDataBunch

In [162]:
#Export
class AudioDataBunch(DataBunch):
    def hear_ex(self, rows:int=3, ds_type:DatasetType=DatasetType.Valid, **kwargs):
        batch = self.dl(ds_type).dataset[:rows]
        self.train_ds.hear_xys(batch.x, batch.y, **kwargs)

### AudioList
This class is responsible to contain a list of AudioItem.

In [163]:
#Export
class AudioList(ItemList):
    _bunch = AudioDataBunch
    
    # TODO: __REPR__    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
            
    def get(self, i):
        item = self.items[i]
        try:            
            if isinstance(item, str):
                return AudioItem(AudioData.load(item))
            if isinstance(item, tuple): #data,sr
                return AudioItem(AudioData(item[0],item[1]))
        except AppError as error:
            print('Format not supported!', file=sys.stderr)
            raise
    
    def reconstruct(self, t:Tensor): return Image(t.transpose(1,2))

    def hear_xys(self, xs, ys, **kwargs):
        for x, y in zip(xs, ys): x.hear(title=y, **kwargs)
            
    # TODO: example with from_folder
    @classmethod
    def from_folder(cls, path:PathOrStr='.', extensions:Collection[str]=None, **kwargs)->ItemList:
        extensions = ifnone(extensions, AUDIO_EXTENSIONS)
        return super().from_folder(path=path, extensions=extensions, **kwargs)


In [164]:
def test_AudioList_from_df_file_names():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    display(df.head())

    #Crete AudioList
    ils = AudioList.from_df(df, path, cols=['FileName'])
    
    #Test a item
    i=5
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_file_names()

Unnamed: 0,FileName
0,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
1,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
2,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
3,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
4,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...


FileName: /home/jupyter/.fastai/data/timit/TEST/DR1/FAKS0/SX43.WAV
torch.Size([39220]) 16000


In [165]:
def test_AudioList_from_df_data_and_sr():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    df['SampleAndSr']=df['FileName'].apply(lambda n: torchaudio.load(n))
    #df = df['tmp'].drop()
    
    display(df.head())

    #Create AudioList
    ils = AudioList.from_df(df, path, cols=['SampleAndSr'])
    
    #Test a item
    i=4
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_data_and_sr()

Unnamed: 0,FileName,SampleAndSr
0,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(0.0002), tensor(-0.0002), tensor(-3...."
1,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(6.1035e-05), tensor(0.0002), tensor(..."
2,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(9.1553e-05), tensor(0.0002), tensor(..."
3,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(9.1553e-05), tensor(6.1035e-05), ten..."
4,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(3.0518e-05), tensor(0.0002), tensor(..."


FileName: /home/jupyter/.fastai/data/timit/TEST/DR1/FAKS0/SX223.WAV
torch.Size([49562]) 16000


### Sample AudioDataBunch Usage

In [166]:
import glob

In [167]:
def process_phn_file(p_file, sig, sr, delimiter=' '):
    df = pd.read_csv(p_file, delimiter=delimiter, header=None)
    df.columns = ['Start', 'End', 'Phn']
    df['SampleAndSr'] = df.apply(lambda x : (sig[-1][x['Start']: x['End']], sr), axis=1)
    return df

def create_phn_df(path, count=100):
    phns = []
    final = pd.DataFrame()

    for phn_file in glob.glob(str(path/'**/*.PHN'), recursive=True)[:count]:
        sig,sr = torchaudio.load(str(phn_file.replace('PHN', 'WAV')))
        df = process_phn_file(phn_file, sig, sr, delimiter=' ')
        df['Source'] = phn_file
        final = final.append(df, ignore_index=True)
    return df

df = create_phn_df(path/'TRAIN')
df.head()

Unnamed: 0,Start,End,Phn,SampleAndSr,Source
0,0,2040,h#,"([tensor(-0.0004), tensor(-3.0518e-05), tensor...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
1,2040,2631,w,"([tensor(3.0518e-05), tensor(0.0001), tensor(0...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
2,2631,3258,ih,"([tensor(0.0015), tensor(-0.0002), tensor(-0.0...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
3,3258,3509,nx,"([tensor(0.0076), tensor(0.0072), tensor(0.005...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
4,3509,6015,ao,"([tensor(0.0397), tensor(0.0306), tensor(0.022...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...


In [168]:
step0 = AudioList.from_df(df, path, cols=['SampleAndSr']); print(type(step0))

<class '__main__.AudioList'>


In [169]:
step1 = step0.split_by_rand_pct(0.1, seed=1);
print(f'Result type: {type(step1)}, Type of Items: {type(step1.lists[0])}')

Result type: <class 'fastai.data_block.ItemLists'>, Type of Items: <class '__main__.AudioList'>


In [170]:
step2 = step1.label_from_df('Phn');
print(f'Result type: {type(step2)}')

Result type: <class 'fastai.data_block.LabelLists'>


In [171]:
def pad_to_max(ad:AudioData, mx=1000, value=0):
    """Pad tensor with `value` until it reaches length `mx`"""
    if ad.sig.shape[0] == mx: return ad
    return AudioData(F.pad(ad.sig, (0,mx-ad.sig.shape[0]), value=value), ad.sr)

def tfm_log(x):
    '''Fake transformation that logs x shape'''
    print(f'Shape of transform input: {x.shape}')
    return x

def tfm_flatten(x):
    return x.reshape(-1)

def tfm_extract_signal(ad:AudioData):
    return ad.sig

In [172]:
max_len = 500
tfms = [partial(pad_to_max, mx=max_len), tfm_extract_signal, tfm_log]
batch_size = 8

In [173]:
step3 = step2.transform([tfms, tfms]);
print(type(step3))

Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
<class 'fastai.data_block.LabelLists'>


In [174]:
step4 = step3.databunch(bs=batch_size);
print(type(step4))

Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
<class '__main__.AudioDataBunch'>


In [175]:
datablock = step4
batch = datablock.one_batch()
print(len(batch[0]), batch[0].shape)
assert batch_size == len(batch[0])
assert max_len == batch[0].shape[1]

Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
Shape of transform input: torch.Size([500])
8 torch.Size([8, 500])


# Export

In [176]:
!python notebook2script.py DataBlock.ipynb

Converted DataBlock.ipynb to nb_DataBlock.py
