In [None]:
import warnings
warnings.filterwarnings("ignore")

import os
import matplotlib.pyplot as plt
import pandas as pd
import torch
import yaml

from torch import multiprocessing
import torchaudio
import torchaudio.transforms as T


%matplotlib inline

In [None]:
torch.cuda.empty_cache()
torch.set_default_tensor_type('torch.cuda.FloatTensor')
multiprocessing.set_start_method('spawn')

In [None]:
with open('detector_config.yaml', 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.Loader)

In [None]:
data = pd.read_json(config['DATASET_ROOT'] + config['DATASET_JSON'])
data.dropna(inplace=True) # drop unlabeled files
data['y'] = data['label'].apply(lambda x: 1 if x == 'whale+' else 0)

In [None]:
data = data.head(5)

In [None]:
transform = T.Spectrogram(**config['torch_spectrogram_params']).cuda()

In [None]:
transform = T.MelSpectrogram(n_mels=180, **config['torch_melspec_params']).cuda()

In [None]:
def spec_transform(filename, transform):
    samples, _ = torchaudio.load(os.path.join(config['DATA_ROOT'], filename))
    samples = samples[::2].cuda()
    sxx = transform(samples)[:,:180,5:]
    # logarithmic transformation mapping to [1..100]
    sxx = 99*(sxx - sxx.min()) / (sxx.max() - sxx.min()) + 1
    sxx = torch.log10(sxx)
    sxx = (sxx - sxx.min()) / (sxx.max() - sxx.min()) # map to [0..1]
    torch.save(sxx.cpu(), os.path.join('./data/', os.path.splitext(filename)[0] + '.pt'))

In [None]:
_ = data['filename'].apply(lambda x: spec_transform(x, transform))

In [None]:
X = torch.load('data/' + os.listdir('data')[111])
X.shape

In [None]:
torch.histogram(X)

In [None]:
plt.imshow(torch.flipud(X.cpu()[0]))
plt.show()