# Audio Sequence Data Block

Datablock API use for audio sequences.

In [9]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
#Export

#Internal dependencies
from exp.nb_AudioCommon import * 
from exp.nb_DataBlock import *
from exp.nb_DataAugmentation import * 

In [11]:
#Export

#External dependencies
import mimetypes
from fastai.vision import *
from fastai.text import *
import torchaudio
from torchaudio import transforms

#for jupyter Display
from IPython.display import Audio

## Sample data for test

## AudioSequenceItem

For sequences of data (for use with models like RNNs), we need a separate AudioItem. This Item organizes data into sequences of `AudioData`.

In [12]:
#Export        
class AudioSequenceItem(ItemBase):
    def __init__(self,
                 data:AudioData,
                 sample_len=401,
                 stride_len=200,
                 max_seqs=20,
                 **kwargs):
        # chopping up one signal item to [0,1,2], [1,2,3], [2,3,4]...
        chopped = []
        numOfChunks = ((data.sig.shape[0]-sample_len)//stride_len)+1
        
        for i in range(0, numOfChunks*stride_len, stride_len):
            if (len(chopped) >= max_seqs): break
            chop = data.sig[i:i+sample_len].clone()
            chopped.append(chop)

        self.data = (chopped, data.sr)
        self.kwargs = kwargs
    
    @property
    def sr(self):
        return self.data[1]
    
    @property
    def seq(self):
        return self.data[0]
    
    @property
    def num_seqs(self):
        return len(self.seq)
    
    def __str__(self):
        return f'Length: {len(self.seq)} | Shape: {self.seq[0].shape} | Sample Rate: {self.sr}'
    
    @property
    def size(self):
        return self.seq[0].size()

    def apply_tfms(self, tfms):
        modified = self.data
        for tfm in tfms:
            modified = tfm(modified)
        return modified

    @property
    def shape(self):
        return self.seq[0].shape

    @classmethod
    def from_file(cls, fp:Path, **kwargs):
        return AudioSequenceItem(AudioData.load(fp), **kwargs)

In [13]:
import glob
from fastai.basics import url2name, datapath4file, untar_data
path = datapath4file('timit')
wavs = glob.glob(str(path/'**/*.WAV'), recursive=True)
len(wavs)

6300

In [14]:
def test_AudioSequenceItem_create_from_audio_file(f):
    a = AudioSequenceItem(AudioData.load(f))
    assert 1 <= len(a.seq), 'Has a sequence'
    assert a.seq[0].shape[0] > 100, 'Has data'
    assert 16000 == a.sr, 'Has sample rate of default 16000'
    assert a.size[0] == 401, 'Sequence contains a tensor of default length'
    display(a)
    
test_AudioSequenceItem_create_from_audio_file(wavs[0])

AudioSequenceItem Length: 20 | Shape: torch.Size([401]) | Sample Rate: 16000

## AudioSequenceList

In [15]:
#Export

def _maybe_squeeze(arr): return (arr if is1d(arr) else np.squeeze(arr))

class AudioSequenceList(ItemList):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    
    @classmethod
    def from_df(cls, df:DataFrame, path:PathOrStr='.', **kwargs)->'ItemList':
        "Create an `ItemList` in `path` from the inputs in the `cols` of `df`."
        inputs = df.iloc[:,] # FIX ME
        assert inputs.isna().sum().sum() == 0, f"You have NaN values in column(s) of your dataframe, please fix it." 
        res = cls(items=_maybe_squeeze(inputs.values), path=path, inner_df=df, **kwargs)
        return res

## AudioSequenceDataBunch

In [16]:
#Export
class AudioSequenceDataBunch(DataBunch):
    @classmethod
    def from_df(cls,
                path:PathOrStr,
                train_df:DataFrame,
                valid_df:DataFrame,
                audio_cols=[],
                **kwargs) -> DataBunch:
        src = ItemLists(path, AudioSequenceList.from_df(train_df, path), AudioSequenceList.from_df(valid_df, path))
        # TODO: toggle classifier or LM here (toggle labels)
        labeled = src.label_const(0)
        extract_seq = lambda x: x[0]
        tfms = [[extract_seq], [extract_seq]]
        labeled.transform(tfms)
        return labeled.databunch(**kwargs)

In [17]:
def test_AudioList_from_df_file_names():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    display(df.head())

    #Crete AudioList
    ils = AudioList.from_df(df, path, cols=['FileName'])
    
    #Test a item
    i=5
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_file_names()

Unnamed: 0,FileName
0,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
1,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
2,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
3,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...
4,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...


FileName: /home/jupyter/.fastai/data/timit/TEST/DR1/FAKS0/SX43.WAV
torch.Size([39220]) 16000


In [18]:
def test_AudioList_from_folder():
    p = path/'TRAIN'

    #Create AudioList
    ils = AudioList.from_folder(p)
    
    #Test an item
    i=4
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_folder()

torch.Size([37991]) 16000


In [19]:
def test_AudioList_from_df_data_and_sr():
    import glob
    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.WAV'), recursive=True)[:10])
    df.columns = ['FileName']
    df['SampleAndSr']=df['FileName'].apply(lambda n: torchaudio.load(n))
    #df = df['tmp'].drop()
    
    display(df.head())

    #Create AudioList
    ils = AudioList.from_df(df, path, cols=['SampleAndSr'])
    
    #Test a item
    i=4
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_data_and_sr()

Unnamed: 0,FileName,SampleAndSr
0,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(0.0002), tensor(-0.0002), tensor(-3...."
1,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(6.1035e-05), tensor(0.0002), tensor(..."
2,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(9.1553e-05), tensor(0.0002), tensor(..."
3,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(9.1553e-05), tensor(6.1035e-05), ten..."
4,/home/jupyter/.fastai/data/timit/TEST/DR1/FAKS...,"([[tensor(3.0518e-05), tensor(0.0002), tensor(..."


FileName: /home/jupyter/.fastai/data/timit/TEST/DR1/FAKS0/SX223.WAV
torch.Size([49562]) 16000


# Export

In [20]:
!python notebook2script.py DataBlockSequence.ipynb

Converted DataBlockSequence.ipynb to nb_DataBlockSequence.py
