In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
import torch


In [74]:
import IPython.display as ipd
from pathlib import Path
from multiprocessing import Pool

import librosa
import numpy as np

from fastai.imports import *
from fastai.transforms import compose

from data_loading_utils import load_audio_files, read_file, write_file
from helpers import pad_to_longest

**Note**

This file creates actual transformations of the audio files and is used with the `use_tfms` flag in the AudioFilesDataset in helpers.py

It didn't seem to improve performance significantly, thought might be worth experimenting with later on


In [11]:
PATH = Path('data/')
SRC_PATH = PATH/'audio_train_24000'

sample_rate=24000

In [81]:
DST_PATH = PATH/'audio_train_24000_tsfms'
# DST_PATH.mkdir(exist_ok=True)

In [82]:
len(list(DST_PATH.iterdir()))

94730

In [56]:
class RandomTimeStretch():
    def __init__(self, min_rate=0.7, max_rate=1.3):
        self.min_rate = min_rate
        self.max_rate = max_rate
    def __call__(self, x, y=None):
        rate = np.random.uniform(self.min_rate, self.max_rate)
        x = librosa.effects.time_stretch(x, rate)
        return x, y

In [57]:
class RandomPitchShift():
    def __init__(self, sample_rate=sample_rate, max_steps=3):
        self.sample_rate = sample_rate
        self.max_steps = max_steps
    def __call__(self, x, y=None):
        n_steps = np.random.uniform(-self.max_steps, self.max_steps)
        x = librosa.effects.pitch_shift(x, sr=self.sample_rate, n_steps=n_steps)
        return x, y

In [58]:
class Transforms():
    def __init__(self, tfms):
        self.tfms = tfms
    def __call__(self, x, y=None): 
        return compose(x, y, self.tfms)
    def __repr__(self):
        return str(self.tfms)

In [59]:
train = pd.read_csv(PATH/'train.csv')
fn1 = train.fname[1]

file, sr = read_file(fn1, SRC_PATH)
file.shape, sr

((247681,), 24000)

In [61]:
ipd.Audio(file, rate=sr)

In [62]:
tfms = Transforms([RandomTimeStretch(), RandomPitchShift()])

In [63]:
file2 = tfms(file)
ipd.Audio(file2, rate=sr)

In [75]:
fnames = train.fname.values

In [76]:
vals = [(fname,i) for fname in fnames for i in range(10)]
len(vals)

94730

In [78]:
def write_tsfm_file(fn_i, src_path=SRC_PATH, dst_path=DST_PATH, sr=sr):
    fn, i = fn_i
    fn_out = f'{fn[:-4]}_{i}.wav'
    if not (dst_path/fn_out).exists():
        file, _ = read_file(fn, src_path)
        ft = tfms(file)
        write_file(ft, fn_out, path=DST_PATH, sample_rate=sr)

In [79]:
tfms_per_file = 10
processes = 8

with Pool(processes=processes) as pool:
    total_tfms = tfms_per_file * len(fnames)
    fname_index_gen = ((fname, i) for fname in fnames for i in range(tfms_per_file))
    
    with tqdm(total=total_tfms) as pbar:
        for _ in pool.imap_unordered(write_tsfm_file, fname_index_gen):
            pbar.update()

100%|██████████| 94730/94730 [1:28:35<00:00, 17.82it/s]


In [80]:
len(list(DST_PATH.iterdir()))

94730