In [1]:
import importlib
import torch
import torchaudio
import librosa

import torch.utils.benchmark as benchmark

import prev_versions.transforms_prev as transforms_prev
import transforms
importlib.reload(transforms)
import functional
importlib.reload(functional)
import utils.local_fairseq as local_fairseq

from utils.plots import plot_spectrogram

2023-02-23 19:44:21 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX


In [None]:
torch_data, sr  = torchaudio.load('audio_data/lex_6.wav')
librosa_mel = librosa.feature.melspectrogram(y=torch_data[0].numpy(),
                                             n_fft=2048,
                                             win_length=1024,
                                             sr=sr)

# Benchmark

In [None]:
torch_data, sr  = torchaudio.load('audio_data/lex_30.wav')
librosa_mel = librosa.feature.melspectrogram(y=torch_data[0].numpy(),
                                             n_fft=2048,
                                             win_length=1024,
                                             sr=sr)

### SpecAugment

In [None]:
torch_spec_prev = transforms_prev.SpecAugment(
            time_warp_w = 150,
            freq_mask_n = 2,
            freq_mask_f = 10,
            time_mask_n = 3,
            time_mask_t = 50,
            time_mask_p = 1.0,
)

torch_spec = transforms.SpecAugment(
            warp_axis=1,
            warp_w = 50,
            freq_mask_num = 0,
            freq_mask_param = 10,
            freq_mask_p = 1.0,
            time_mask_num = 5,
            time_mask_param = 50,
            time_mask_p = 1.0,
)

fairseq_spec = \
    local_fairseq.SpecAugmentTransform(
        time_warp_w = 50,
        freq_mask_n = 2,
        freq_mask_f = 50,
        time_mask_n = 2,
        time_mask_t = 10,
        time_mask_p = 1.0,
)

In [None]:
torch_tensor = torch.tensor(librosa_mel)
t0 = benchmark.Timer(
    stmt='augmented = torch_spec(mel)',
    label='* SPECAUGMENT WITH TORCH TIMESTRETCH',
    globals={"mel": torch_tensor, "torch_spec": torch_spec_prev})

torch_tensor_3d = torch.tensor(librosa_mel).unsqueeze(0)
t1 = benchmark.Timer(
    stmt="augmented = torch_spec(mel)",
    label='* SPECAUGMENT WITH TORCH INTERPOLATE',
    globals={"mel": torch_tensor_3d, "torch_spec": torch_spec})

t2 = benchmark.Timer(
    stmt='augmented = fairseq_spec(mel)',
    label='* SPECAUGMENT WITH FAIRSEQ',
    globals={"mel": librosa_mel, "fairseq_spec": fairseq_spec})


res0 = t0.timeit(500)
res1 = t1.timeit(500)
res2 = t2.timeit(500)



In [None]:
print(res0)
print(res1)
print(res2)


<torch.utils.benchmark.utils.common.Measurement object at 0x10cef3160>
* SPECAUGMENT WITH TORCH TIMESTRETCH
  4.48 ms
  1 measurement, 500 runs , 1 thread
<torch.utils.benchmark.utils.common.Measurement object at 0x108047490>
* SPECAUGMENT WITH TORCH INTERPOLATE
  461.11 us
  1 measurement, 500 runs , 1 thread
<torch.utils.benchmark.utils.common.Measurement object at 0x14cd3ff70>
* SPECAUGMENT WITH FAIRSEQ
  337.90 us
  1 measurement, 500 runs , 1 thread


### Warp Axis

In [None]:
t0 = benchmark.Timer(
    stmt='augmented = warp_axis_torch(mel, 1, 200)',
    label='* WARP AXIS WITH TORCH INTERPOLATE',
    globals={"mel": torch.tensor(librosa_mel), "warp_axis_torch": functional.warp_axis_torch})

t1 = benchmark.Timer(
    stmt='augmented = warp_axis_cv2(mel, 1, 200)',
    label='* WARP AXIS WITH CV2 RESIZE',
    globals={"mel": librosa_mel, "warp_axis_cv2": functional.warp_axis_cv2})

res0 = t0.timeit(5000)
res1 = t1.timeit(5000)

In [None]:
print(res0)
print(res1)

<torch.utils.benchmark.utils.common.Measurement object at 0x150b03160>
* WARP AXIS WITH TORCH INTERPOLATE
  435.73 us
  1 measurement, 5000 runs , 1 thread
<torch.utils.benchmark.utils.common.Measurement object at 0x291f165c0>
* WARP AXIS WITH CV2 RESIZE
  283.40 us
  1 measurement, 5000 runs , 1 thread


### torch.nn.interpolate vs cv2.Resize

In [None]:
tensor_mel.shape

torch.Size([1, 1, 128, 2584])

In [None]:
new_sz = (800, 800)
t0 = benchmark.Timer(
    stmt='resize(mel, dsize=(800,800), interpolation=cv2.INTER_LINEAR)',
    setup="import cv2",
    label="* CV2 RESIZE",
    globals={"mel": librosa_mel, "resize": cv2.resize})


tensor_mel = torch.tensor(librosa_mel)
tensor_mel = tensor_mel[(None,)*2]
t1 = benchmark.Timer(
    stmt = "interpolate(mel, size=(800,800), mode='bilinear')",
    label="* TORCH INTERPOLATE",
    globals={"mel": tensor_mel, "interpolate": torch.nn.functional.interpolate})

res0 = t0.timeit(5000)
res1 = t1.timeit(5000)

In [None]:
print(res0)
print(res1)

<torch.utils.benchmark.utils.common.Measurement object at 0x105f13040>
* CV2 RESIZE
setup: import cv2
  173.95 us
  1 measurement, 5000 runs , 1 thread
<torch.utils.benchmark.utils.common.Measurement object at 0x294197700>
* TORCH INTERPOLATE
  373.30 us
  1 measurement, 5000 runs , 1 thread
