In [None]:
from collections import OrderedDict
import glob

import joypy
import librosa
import librosa.display
import scipy
from scipy import signal
from scipy.io import wavfile
import tftb

from util import *
from util import _unpack_input

figsize(aspect_ratio=1/6)
None

In [None]:
# Load audio paths
audio_paths = pd.DataFrame([
    OrderedDict(
        source=path.split('/')[-4],
        species_code=path.split('/')[-3],
        title=os.path.splitext(path.split('/')[-1])[0],
        path=path,
    )
    for path in glob.glob(f'{peterson_dir}/*/audio/*')
])
display(
    audio_paths[:5],
    audio_paths.groupby(['source', 'species_code']).count(),
)

In [None]:
# Load audio from paths
recs_2ch = (audio_paths
    [lambda df: df.species_code == 'wlswar'].reset_index(drop=True)  # For faster dev
    [:5]  # For faster dev
    .assign(audio=lambda df: df.reset_index(drop=True).apply(axis=1, func=lambda rec:
        (
            print(f'Loading audio {rec.name + 1}/{len(df)}: {rec.path}') if rec.name % (np.ceil(len(df) / 5) or 1) == 0 else None,
            audiosegment.from_file(rec.path),
        )[-1]
    ))
)
recs_2ch.audio[:5]

In [None]:
recs = (recs_2ch
    .assign(
        # Merge stereo to mono so we don't get confused when handling samples (we don't care about stereo vs. mono)
        audio=lambda df: df.audio.apply(lambda audio:
            audio.resample(channels=1, sample_rate_Hz=standard_sample_rate_hz)
            # audio.set_channels(1)  # TODO Any loss in fidelity by using .resample(channels=1)?
        ),
    )
    .assign(
        # Materialize audio samples
        samples=lambda df: df.audio.map(lambda audio: audio.to_numpy_array()),
    )
    .pipe(df_reorder_cols, last=['path'])
)
recs[:5]

In [None]:
# Names for easier dev (better autocomplete)
rec0 = recs.iloc[0]
audio0 = rec0.audio

In [None]:
rec, audio, x, sample_rate = _unpack_input(audio0)

In [None]:
from util import *
nperseg = 1024; overlap = 3/4; n_mels = nperseg // 4
magic = nperseg // 2
mel_basis = librosa.filters.mel(sample_rate, n_fft=nperseg, n_mels=n_mels); display(mel_basis.shape)

In [None]:
# A non-mel spectro, for comparison
# plt_spectro(audio, nperseg=nperseg, overlap=overlap); plt.show()

In [None]:
# This should match the reference mel spectros below
plt_melspectro(audio, nperseg=nperseg, overlap=overlap); plt.show()

In [None]:
# This should match the reference mel spectros below
f, t, S = melspectro(audio, nperseg=nperseg, overlap=overlap)
display(S.shape); plt.hist(S.flatten(), bins=100, log=True); plt.show()
plt.pcolormesh(S); [s.set_visible(False) for s in plt.gca().spines.values()]; plt.show()

In [None]:
# This should match the reference mel spectros below
f, t, S = spectro(audio, nperseg=nperseg, overlap=overlap, scaling='spectrum', mode='magnitude'); display(S.shape)
S = S * magic
S = S**2; display(S.shape)
S = np.dot(mel_basis, S); display(S.shape)
S = librosa.power_to_db(S)
display(S.shape); plt.hist(S.flatten(), bins=100, log=True); plt.show()
display((f.min(), f.max()))
f = librosa.mel_frequencies(n_mels, f.min(), f.max()); display((f.min(), f.max()))
plt.pcolormesh(t, f, S)
plt.gca().set_yscale(value='symlog', basey=2, linthreshy=1024, linscaley=.5)
plt.gca().xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%ds'))
plt.gca().yaxis.set_major_formatter(mpl.ticker.FuncFormatter(lambda x, pos=None: '%.0fKiHz' % int(x / 1024)))
plt.gca().set_ylabel(''); plt.gca().set_xlabel(''); plt.gca().tick_params(labelsize=8)
[s.set_visible(False) for s in plt.gca().spines.values()]
plt.show()

In [None]:
# Reference: this is a "correct" mel spectro (with plt.pcolormesh instead of librosa.display.specshow)
n_fft = nperseg
S = librosa.power_to_db(librosa.feature.melspectrogram(x.astype(float), sample_rate, None, n_fft, int(n_fft*(1-overlap)), n_mels=n_mels, power=2))
display(S.shape); plt.hist(S.flatten(), bins=100, log=True); plt.show()
plt.pcolormesh(S); [s.set_visible(False) for s in plt.gca().spines.values()]; plt.show()

In [None]:
# Reference: this is a "correct" mel spectro (with proper axes)
S = librosa.feature.melspectrogram(x.astype(float), sample_rate, None, n_fft, int(n_fft*(1-overlap)), n_mels=n_mels, power=2); display(S.shape)
S = librosa.power_to_db(S)
display(S.shape); plt.hist(S.flatten(), bins=100, log=True); plt.show()
librosa.display.specshow(S, x_axis='time', y_axis='mel', cmap=plt.get_cmap())
plt.gca().set_yscale(value='symlog', basey=2, linthreshy=1024, linscaley=.5)
plt.gca().xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%ds'))
plt.gca().yaxis.set_major_formatter(mpl.ticker.FuncFormatter(lambda x, pos=None: '%.0fKiHz' % int(x / 1024)))
# plt.gca().yaxis.set_major_formatter(mpl.ticker.FuncFormatter(lambda x, pos=None: '%.0fHz' % x))
plt.gca().set_ylabel(''); plt.gca().set_xlabel(''); plt.gca().tick_params(labelsize=8)
[s.set_visible(False) for s in plt.gca().spines.values()]
plt.show()