In [None]:
import os
import pathlib

import matplotlib.pyplot as plt
from src.config.defaults import *

from src.utils.utils_functions import *

os.chdir(PATH_WORK_DIR)

%load_ext autoreload
%autoreload 2

In [None]:
instruments_size = {k: 0 for k in INSTRUMENT_TO_IDX.keys()}
instrument_dirs = [f.path for f in os.scandir(config.path_irmas_train) if f.is_dir()]

for instrument_dir in instrument_dirs:
    instrument_name = Path(instrument_dir).stem
    num_of_files = len(os.listdir(instrument_dir))
    instruments_size[instrument_name] = num_of_files

instrument_frequency = sorted(instruments_size.items(), key=lambda x: x[1], reverse=True)

plt.figure(figsize=(15, 5), facecolor="w")
plt.bar(
    [INSTRUMENT_TO_FULLNAME[f[0]] for f in instrument_frequency],
    [f[1] for f in instrument_frequency],
)

In [None]:
import torch
import torchaudio
import torchaudio.functional as F
import librosa

audio_path1 = Path(config.path_irmas_train,"vio/001__[vio][nod][cou_fol]2194__1.wav")
audio_path2 = Path(config.path_irmas_train,"gac/[gac][cla]0518__1.wav")

audio1, _ = librosa.load(audio_path1, sr=16_000, mono=True)
audio2, _ = librosa.load(audio_path2, sr=16_000, mono=True)

audio = [torch.tensor(audio1), torch.tensor(audio2)]

audio = torch.vstack(audio)
audio.shape

spec1 = librosa.feature.melspectrogram(y=audio1, sr=16_000, hop_length=DEFAULT_HOP_LENGTH, n_fft=DEFAULT_N_FFT)
spec2 = librosa.feature.melspectrogram(y=audio2, sr=16_000, hop_length=DEFAULT_HOP_LENGTH, n_fft=DEFAULT_N_FFT)
spec1.shape
specs = torch.stack([torch.tensor(spec1), torch.tensor(spec2)])
specs.shape




In [None]:

from src.config.defaults import *

def spectrogram_batchify(
    spectrograms: np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor],
) -> np.ndarray:
    """Send one or multiple spectrograms and return them in npy form"""
    if isinstance(spectrograms, list):
        spectrograms = np.array(spectrograms)
    if isinstance(spectrograms, torch.Tensor):
        spectrograms = spectrograms.detach().numpy()
    if not isinstance(spectrograms, np.ndarray):
        assert False, "Invalid type"
    if len(spectrograms.shape) == 2:
        spectrograms = [spectrograms]
    elif len(spectrograms.shape) > 3:
        assert False, "spectrograms has to be 1D or 2D (batch)"
    return spectrograms


def plot_spectrograms(
    spectrograms: np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor],
    sr=DEFAULT_SAMPLING_RATE,
    titles: list[str] | None = None,
    type="mel",
    hop_length=DEFAULT_HOP_LENGTH,
    n_fft=DEFAULT_HOP_LENGTH,
):
    spectrograms = spectrogram_batchify(spectrograms)
    batch_size = len(spectrograms)
    if titles is not None and len(titles) != batch_size:
        assert False, "There should be n titles or None"
    sqrt = math.ceil(math.sqrt(batch_size))
    n_rows = sqrt
    n_cols = sqrt

    fig = plt.figure(figsize=(13,9))
    for i, spec in enumerate(spectrograms):
        title = titles[i] if titles is not None else ""
        plt.subplot(n_rows, n_cols, i + 1)
        print(spec.shape)
        spec_db = librosa.power_to_db(spec, ref=np.max)

        img = librosa.display.specshow(
            spec_db,
            y_axis="mel",
            x_axis="time",
            sr=sr,
            hop_length=hop_length,
            n_fft=n_fft,
        )
        plt.title(title)
        plt.colorbar(img, format="%+2.f dB")
    plt.tight_layout()
    plt.show()
    
def audios_to_mel_spectrograms(
    audio: np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor],
    sr=DEFAULT_SAMPLING_RATE,
):
    if isinstance(audio, list):
        audio = np.array(audio)
    if isinstance(audio, torch.Tensor):
        audio = audio.detach().numpy()
    if not isinstance(audio, np.ndarray):
        assert False, "Invalid type"
    if len(audio.shape) == 1:
        audio = [audio]
    elif len(audio.shape) > 2:
        assert False, "Audio has to be 1D or 2D (batch)"
    spectrograms = [librosa.feature.melspectrogram(y=a, sr=sr,hop_length=DEFAULT_HOP_LENGTH, n_fft=DEFAULT_N_FFT) for a in audio]
    batched_spectrograms = np.stack(spectrograms)
    return batched_spectrograms


def audio_melspectrogram_plot(
    audio: np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor],
    sr=DEFAULT_SAMPLING_RATE,
    titles: list[str] | None = None,
):
    spectrograms = audios_to_mel_spectrograms(audio, sr)
    plot_spectrograms(spectrograms, sr=sr, titles=titles)

In [None]:
audio_melspectrogram_plot(audio)

plot_spectrograms(spectrograms=specs, titles=])