In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

# Select a test signal


In [None]:
import librosa
filename = librosa.util.example_audio_file()

# Load the signal

In [None]:
from utils import load_signal
y, sr = load_signal(filename)
# Apply some light preprocessing
from utils import preprocess_signal
y = preprocess_signal(y)
# Plot the signal in the time domain
n = 256*256
t = np.arange(n)/sr*1000
plt.plot(t, y[:n])
plt.xlabel('Time [ms]');

In [None]:
from IPython.display import display, Audio
display(Audio(y, rate=sr))

# Define the main parameters
* The number of frequency channel for the stft `stft_channels`
* The hop_size in time `hop_size`
* The number of mel bins in time `n_mels`

In [None]:
# Here we use the default paramters. We recommend using them.
from hparams import HParams
stft_channels = HParams.stft_channels # 1024
hop_size = HParams.hop_size # 256
n_mels = HParams.n_mels # 80

# Build the time frequency system
The system will be used for the following operation:
* Compute the STFT
* Compute the spectrogram
* Compute the mel spectrogram

In [None]:
from stft import GaussTF
stft_system = GaussTF(hop_size=hop_size, stft_channels=stft_channels)

# Compute the melspectrogram

In [None]:
Y = stft_system.spectrogram(y)

In [None]:
# We use the log mel spectrogram as we have a logarithmic perception of sound energy.
from transforms import log_mel_spectrogram
log_Y = log_mel_spectrogram(Y, stft_channels, n_mels)

In [None]:
plt.figure(dpi=200, figsize=(10,3))
plt.imshow(log_Y[:,:n//hop_size])
plt.colorbar()