In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
import os
import time

from paderbox.database.iterator import AudioReader
from pytorch_sanity.transforms import Compose, Spectrogram

from paderbox.database.timit import Timit

In [None]:
db = Timit()

In [None]:
transform_config = dict(
    sample_rate=16000,
    frame_step=320,
    frame_length=960,
    fft_length=1024,
    window='hann',
    padded=False,
    n_mels=64,
    log=True,
    fmin=20,
    fmax=None
)

# Serial

In [None]:
it = db.get_iterator_by_names('train')
transform = Compose(AudioReader(), Spectrogram(config=transform_config))
it = it.map(transform, num_workers=8, buffer_size=100, backend="mp")

In [None]:
%%timeit -n 1 -r 1
for i, ex in enumerate(it):
    pass

# Parallel Map with multiprocessing

In [None]:
it = db.get_iterator_by_names('train')
transform = Compose(AudioReader(), Spectrogram(config=transform_config))
it = it.map(transform, num_workers=8, buffer_size=100, backend="mp")

In [None]:
%%timeit -n 1 -r 1
for i, ex in enumerate(it):
    pass

# Parallel Map with multithreading

In [None]:
it = db.get_iterator_by_names('train')
transform = Compose(AudioReader(), Spectrogram(config=transform_config))
it = it.map(transform, num_workers=8, buffer_size=100, backend="t")

In [None]:
%%timeit -n 1 -r 1
for i, ex in enumerate(it):
    pass

# Prefetch with multithreading

In [None]:
it = db.get_iterator_by_names('train')
transform = Compose(AudioReader(), Spectrogram(config=transform_config))
it = it.map(transform)
it = it.prefetch(num_workers=8, buffer_size=100, backend="t")

In [None]:
%%timeit -n 1 -r 1
for i, ex in enumerate(it):
    pass

# Pytorch DataLoader

## Serial

In [None]:
from torch.utils.data import DataLoader
it = db.get_iterator_by_names('train')
transform = Compose(AudioReader(), Spectrogram(config=transform_config))
it = it.map(transform)
it = DataLoader(it, num_workers=0, batch_size=1, collate_fn=lambda x:x)

In [None]:
%%timeit -n 1 -r 1
for i, ex in enumerate(it):
    pass

# Parallel

In [None]:
from torch.utils.data import DataLoader
it = db.get_iterator_by_names('train')
transform = Compose(AudioReader(), Spectrogram(config=transform_config))
it = it.map(transform)
it = DataLoader(it, num_workers=8, batch_size=1, collate_fn=lambda x:x)

In [None]:
%%timeit -n 1 -r 1
for i, ex in enumerate(it):
    pass