# Audio Data Block

In [509]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [510]:
#Export

#Internal dependencies
from exp.nb_AudioCommon import * 
from exp.nb_DataAugmentation import * 

In [511]:
#Export

#External dependencies
import mimetypes
from fastai.vision import *
from fastai.text import *
import torchaudio
from torchaudio import transforms

#for jupyter Display
from IPython.display import Audio

## Sample data for test

In [512]:
# Standard path notation for fast.ai
# The files willbe saved on $HOME/.fastai/data/timit/
path = getFastAiWorkingDirectory('timit')

Working directory: /home/jupyter/.fastai/data/timit


## Data Block classes

In [513]:
DataBunch

fastai.basic_data.DataBunch

### AudioItem
This is the base class of fast.ai Audio tht holds a reference to and AudioData object.

In [514]:
#Export        
class AudioItem(ItemBase):
    def __init__(self, data:AudioData, **kwargs):
        self.data = data # Always flatten out to single dimension signal!
        self.kwargs = kwargs

    @property
    def sig(self):
        return self.data[0]
    
    @property
    def sr(self):
        return self.data[1]
    
    def apply_tfms(self, tfms):
        for tfm in tfms:
            self.data = tfm(self.data)
        return self

    def __str__(self):
        return self.data.__str__()
    
    def __len__(self): return self.data.__len__()
    
    def _repr_html_(self): return self.data._repr_html_()
    
    def show(self, title:Optional[str]=None, **kwargs):
        "Show sound on `ax` with `title`, using `cmap` if single-channel, overlaid with optional `y`"
        return self.data.show(title=title)

    def hear(self, title=None):
        return self.data.hear(title=title)

    @property
    def shape(self):
        return self.data.shape
    
    @property
    def ipy_audio(self):
        return self.data.ipy_audio

    @property
    def duration(self): return self.data.duration    


In [515]:
def test_AudioItem_method_test():
    signal,samplerate = torchaudio.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV'))
    a = AudioItem(AudioData(signal,samplerate))
    # TODO: Real asserts here
    a.duration
    str(a)
    a.shape
    len(a)
    a.show()
    a.hear()
    a.apply_tfms([])
    a.ipy_audio
    display(a)
    return a

test_AudioItem_method_test()

In [516]:
def test_AudioItem_create_from_data():
    signal,samplerate = torchaudio.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV'))
    a = AudioItem(AudioData(signal,samplerate))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)

test_AudioItem_create_from_data()

In [517]:
def test_AudioItem_create_from_audio_file():
    a = AudioItem(AudioData.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV')))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)
    
test_AudioItem_create_from_audio_file()

## AudioDataBunch

In [518]:
#Export
class AudioDataBunch(DataBunch):
    def hear_ex(self, rows:int=3, ds_type:DatasetType=DatasetType.Valid, **kwargs):
        batch = self.dl(ds_type).dataset[:rows]
        self.train_ds.hear_xys(batch.x, batch.y, **kwargs)

## AudioSequenceItem

For sequences of data (for use with models like RNNs), we need a separate AudioItem. This Item organizes data into sequences of `AudioData`.

In [519]:
# p = getFastAiWorkingDirectory('timit')
# wavs = glob.glob(str(p/'**/*.WAV'), recursive=True)
# sig,sr = torchaudio.load(wavs[0])
# sig = sig.squeeze()
# AudioSequenceItem((sig, sr))

In [520]:
#Export        
class AudioSequenceItem(ItemBase):
    def __init__(self,
                 sig_and_sr,
                 sample_len=401,
                 stride_len=200,
                 max_seqs=20,
                 **kwargs):
        # chopping up one signal item to [0,1,2], [1,2,3], [2,3,4]...
        chopped = []
        sig,sr=sig_and_sr
        numOfChunks = ((sig.shape[0]-sample_len)//stride_len)+1
        
        for i in range(0,numOfChunks*stride_len, stride_len):
            if (len(chopped) >= max_seqs): break
            chop = sig[i:i+sample_len].clone()
            chopped.append(chop)

        self.data = (chopped, sr)
        self.kwargs = kwargs
    
    @property
    def sr(self):
        return self.data[1]
    
    @property
    def seq(self):
        return self.data[0]
    
    def __str__(self):
        return f'Length: {len(self.seq)} | Shape: {self.seq[0].shape} | Sample Rate: {self.sr}'
    
    @property
    def size(self):
        return self.data[0].size()
    
#     def __len__(self): return self.data.__len__()
    
#     def _repr_html_(self): return self.data._repr_html_()
    
#     def show(self, title:Optional[str]=None, **kwargs):
#         "Show sound on `ax` with `title`, using `cmap` if single-channel, overlaid with optional `y`"
#         return self.data.show(title=title)

#     def hear(self, title=None):
#         return self.data.hear(title=title)

    def apply_tfms(self, tfms):
        modified = self.data
        for tfm in tfms:
            modified = tfm(modified)
        return modified

    @property
    def shape(self):
        return self.seq[0].shape
    
#     @property
#     def ipy_audio(self):
#         return self.data.ipy_audio

#     @property
#     def duration(self): return self.data.duration    


## AudioSequenceList

In [521]:
#Export

def _maybe_squeeze(arr): return (arr if is1d(arr) else np.squeeze(arr))

class AudioSequenceList(ItemList):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    
    @classmethod
    def from_df(cls, df:DataFrame, path:PathOrStr='.', **kwargs)->'ItemList':
        "Create an `ItemList` in `path` from the inputs in the `cols` of `df`."
        inputs = df.iloc[:,] # FIX ME
        assert inputs.isna().sum().sum() == 0, f"You have NaN values in column(s) of your dataframe, please fix it." 
        res = cls(items=_maybe_squeeze(inputs.values), path=path, inner_df=df, **kwargs)
        return res

In [522]:
signal,sr = torchaudio.load(str(path/'TRAIN/DR1/MDPK0/SA1.WAV'))
seq = torch.cat([signal, signal, signal])
seq_item = AudioSequenceItem((seq, sr)); seq_item

IndexError: list index out of range

## AudioSequenceDataBunch

In [523]:
#Export
def reorder_for_rnn(t):
    t = t[:,None,:]
    return t

In [524]:
#Export
class AudioSequenceDataBunch(DataBunch):
    @classmethod
    def from_df(cls,
                path:PathOrStr,
                train_df:DataFrame,
                valid_df:DataFrame,
                audio_cols=[],
                **kwargs) -> DataBunch:
        src = ItemLists(path, AudioSequenceList.from_df(train_df, path), AudioSequenceList.from_df(valid_df, path))
        # TODO: toggle classifier or LM here (toggle labels)
        labeled = src.label_const(0)
        extract_seq = lambda x: x[0]
        tfms = [[extract_seq], [extract_seq]]
        labeled.transform(tfms)
        return labeled.databunch(**kwargs)

### AudioList
This class is responsible to contain a list of AudioItem.

In [525]:
#Export
class AudioList(ItemList):
    _bunch = AudioDataBunch
    
    # TODO: __REPR__    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
            
    def get(self, i):
        item = self.items[i]
        if isinstance(item, (Path, str)):
            return AudioItem(AudioData.load(str(item)))
        if isinstance(item, (tuple, np.ndarray)): #data,sr 
            return AudioItem(AudioData(item[0],item[1]))
        print('Format not supported!', file=sys.stderr)
        raise

    def reconstruct(self, t:Tensor): return Image(t.transpose(1,2))

    def hear_xys(self, xs, ys, **kwargs):
        for x, y in zip(xs, ys): x.hear(title=y, **kwargs)
            
    # TODO: example with from_folder
    @classmethod
    def from_folder(cls, path:PathOrStr='.', extensions:Collection[str]=None, **kwargs)->ItemList:
        extensions = ifnone(extensions, AUDIO_EXTENSIONS)
        return super().from_folder(path=path, extensions=extensions, **kwargs)

In [526]:
def test_AudioList_from_df_file_names():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    display(df.head())

    #Crete AudioList
    ils = AudioList.from_df(df, path, cols=['FileName'])
    
    #Test a item
    i=5
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_file_names()

Unnamed: 0,FileName
0,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
1,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
2,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
3,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
4,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...


FileName: /home/jupyter/.fastai/data/timit/TEST/DR1/FAKS0/SX43.WAV
torch.Size([39220]) 16000


In [527]:
def test_AudioList_from_folder():
    p = path/'TRAIN'

    #Create AudioList
    ils = AudioList.from_folder(p)
    
    #Test an item
    i=4
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_folder()

torch.Size([37991]) 16000


In [528]:
def test_AudioList_from_df_data_and_sr():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    df['SampleAndSr']=df['FileName'].apply(lambda n: torchaudio.load(n))
    #df = df['tmp'].drop()
    
    display(df.head())

    #Create AudioList
    ils = AudioList.from_df(df, path, cols=['SampleAndSr'])
    
    #Test a item
    i=4
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_data_and_sr()

Unnamed: 0,FileName,SampleAndSr
0,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(0.0002), tensor(-0.0002), tensor(-3...."
1,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(6.1035e-05), tensor(0.0002), tensor(..."
2,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(9.1553e-05), tensor(0.0002), tensor(..."
3,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(9.1553e-05), tensor(6.1035e-05), ten..."
4,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(3.0518e-05), tensor(0.0002), tensor(..."


FileName: /home/jupyter/.fastai/data/timit/TEST/DR1/FAKS0/SX223.WAV
torch.Size([49562]) 16000


### Sample AudioDataBunch Usage

In [529]:
import glob

def process_phn_file(p_file, sig, sr, delimiter=' '):
    df = pd.read_csv(p_file, delimiter=delimiter, header=None)
    df.columns = ['Start', 'End', 'Phn']
    df['SampleAndSr'] = df.apply(lambda x : (sig[-1][x['Start']: x['End']], sr), axis=1)
    return df

def create_phn_df(path, count=100):
    phns = []
    final = pd.DataFrame()

    for phn_file in glob.glob(str(path/'**/*.PHN'), recursive=True)[:count]:
        sig,sr = torchaudio.load(str(phn_file.replace('PHN', 'WAV')))
        df = process_phn_file(phn_file, sig, sr, delimiter=' ')
        df['Source'] = phn_file
        final = final.append(df, ignore_index=True)
    return df

df = create_phn_df(path/'TRAIN')
df.head()

Unnamed: 0,Start,End,Phn,SampleAndSr,Source
0,0,2040,h#,"([tensor(-0.0004), tensor(-3.0518e-05), tensor...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
1,2040,2631,w,"([tensor(3.0518e-05), tensor(0.0001), tensor(0...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
2,2631,3258,ih,"([tensor(0.0015), tensor(-0.0002), tensor(-0.0...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
3,3258,3509,nx,"([tensor(0.0076), tensor(0.0072), tensor(0.005...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...
4,3509,6015,ao,"([tensor(0.0397), tensor(0.0306), tensor(0.022...",/home/jupyter/.fastai/data/timit/TRAIN/DR1/MRW...


In [530]:
step0 = AudioList.from_df(df, path, cols=['SampleAndSr']); print(type(step0))

<class '__main__.AudioList'>


In [531]:
step0

AudioList (21 items)
REPRESENTATION,REPRESENTATION,REPRESENTATION,REPRESENTATION,REPRESENTATION
Path: /home/jupyter/.fastai/data/timit

In [532]:
step1 = step0.split_by_rand_pct(0.1, seed=1);
print(f'Result type: {type(step1)}, Type of Items: {type(step1.lists[0])}')

Result type: <class 'fastai.data_block.ItemLists'>, Type of Items: <class '__main__.AudioList'>


In [533]:
step2 = step1.label_from_df('Phn');
print(f'Result type: {type(step2)}')

Result type: <class 'fastai.data_block.LabelLists'>


In [534]:
#Export
def get_audio_transforms(spectro:bool=False,
                         white_noise:bool=True,
                         modulate_volume:bool=True,
                         random_cutout:bool=True,
                         pad_with_silence:bool=True,
                         pitch_warp:bool=True,
                         down_and_up:bool=True,
                         mx_to_pad:int=1000,
                         xtra_tfms:Optional[Collection[Transform]]=None,
                         **kwargs)->Collection[Transform]:
    "Utility func to easily create a list of audio transforms."
    res = []
    if white_noise: res.append(partial(tfm_add_white_noise, noise_scl=0.005, **kwargs))
    if modulate_volume: res.append(partial(tfm_modulate_volume, lower_gain=.1, upper_gain=1.2, **kwargs))
    if random_cutout: res.append(partial(tfm_random_cutout, pct_to_cut=.15, **kwargs))
    if pad_with_silence: res.append(partial(tfm_pad_with_silence, pct_to_pad=.15, min_to_pad=None, max_to_pad=None, **kwargs))
    if pitch_warp: res.append(partial(tfm_pitch_warp, shift_by_pitch=None, bins_per_octave=12, **kwargs))
    if down_and_up: res.append(partial(tfm_down_and_up, sr_divisor=2, **kwargs))
    res.append(partial(tfm_pad_to_max, mx=mx_to_pad))
    final_transform = tfm_extract_signal
    if spectro: final_transform = tfm_spectro
    res.append(final_transform)
    #       train                   , valid
    return (res + listify(xtra_tfms), [partial(tfm_pad_to_max, mx=mx_to_pad), final_transform])


In [535]:
batch_size = 8

In [536]:
tfms = get_audio_transforms()
step3 = step2.transform(tfms);
print(type(step3))

<class 'fastai.data_block.LabelLists'>


In [537]:
step4 = step3.databunch(bs=batch_size);
print(type(step4))

<class '__main__.AudioDataBunch'>


In [538]:
step4 = step3.databunch(bs=batch_size);
step4;

In [539]:
max_len = 1000
datablock = step4
batch = datablock.one_batch()
print(len(batch[0]), batch[0].shape)
assert batch_size == len(batch[0])
assert max_len == batch[0].shape[1]

8 torch.Size([8, 1000])


# Export

In [540]:
!python notebook2script.py DataBlock.ipynb

Converted DataBlock.ipynb to nb_DataBlock.py
