In [None]:
! pip install -U pip
! pip install -U torch==1.5.1
! pip install -U torchaudio==0.5.1
! pip install -U matplotlib==3.2.1
! pip install -U clearml>=0.16.1
! pip install -U tensorboard==2.2.1

In [None]:
import os
import torch
import torchaudio
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt

from clearml import Task

%matplotlib inline

In [None]:
task = Task.init(project_name='Audio Example', task_name='data pre-processing')
configuration_dict = {'number_of_samples': 3}
configuration_dict = task.connect(configuration_dict)  # enabling configuration override by clearml
print(configuration_dict)  # printing actual configuration (after override in remote mode)

In [None]:
tensorboard_writer = SummaryWriter('./tensorboard_logs')

In [None]:
if not os.path.isdir('./data'):
    os.mkdir('./data')
yesno_data = torchaudio.datasets.YESNO('./data', download=True)

In [None]:
def plot_signal(signal, title, cmap=None):
    plt.figure()
    if signal.ndim == 1:
        plt.plot(signal)
    else:
        plt.imshow(signal, cmap=cmap)    
    plt.title(title)
    plt.show()

In [None]:
fixed_sample_rate = 22050
for n in range(configuration_dict.get('number_of_samples', 3)):
    audio, sample_rate, labels = yesno_data[n]
    tensorboard_writer.add_audio('Audio samples/{}'.format(n), audio, n, sample_rate)
    
    resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=fixed_sample_rate)
    melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=fixed_sample_rate, n_mels=128)
    
    audio_mono = torch.mean(resample_transform(audio), dim=0, keepdim=True)
    plot_signal(audio_mono[0,:], 'Original waveform')
    
    melspectogram = melspectogram_transform(audio_mono)
    plot_signal(melspectogram.squeeze().numpy(), 'Mel spectogram', 'hot')
    plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram).squeeze().numpy(), 'Mel spectogram DB', 'hot')