In [None]:
import librosa
import numpy as np
from math import floor

def compute_modulation_spectrum(audio_path):
    '''
    Compute a mel-spectrogram for guitar modulations and return it in a shape of (1,1,96,1366),
    where 96 == #mel-bins and 1366 == #time frame.

    Parameters
    ----------
    audio_path: str
        Path to the audio file. Any format supported by audioread will work.

    Returns
    -------
    ret: np.ndarray
        The computed mel-spectrogram.
    '''
    # Spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12  # to make it 1366 frames

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_fit = int(DURA * SR)

    if n_sample < n_sample_fit:
        src = np.hstack((src, np.zeros((n_sample_fit - n_sample,))))
    elif n_sample > n_sample_fit:
        src = src[int((n_sample - n_sample_fit) / 2):int((n_sample + n_sample_fit) / 2)]

    melgram = librosa.feature.melspectrogram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS)
    ret = librosa.power_to_db(melgram, ref=np.max)
    ret = ret[np.newaxis, np.newaxis, :]

    return ret


def compute_modulation_spectrum_multiframe(audio_path, all_song=True):
    '''
    Compute a mel-spectrogram for guitar modulations in multiple frames of the song.

    Parameters
    ----------
    audio_path: str
        Path to the audio file. Any format supported by audioread will work.
    all_song: bool
        Whether to process the entire song or not.

    Returns
    -------
    ret: np.ndarray
        The computed mel-spectrogram in multiple frames.
    '''
    # Spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12  # to make it 1366 frames
    DURA_TRASH = 0 if all_song else 20

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_fit = int(DURA * SR)
    n_sample_trash = int(DURA_TRASH * SR)

    # Remove the trash at the beginning and at the end
    src = src[n_sample_trash:n_sample - n_sample_trash]
    n_sample -= 2 * n_sample_trash

    ret = np.zeros((0, 1, 96, 1366), dtype=np.float32)

    if n_sample < n_sample_fit:
        src = np.hstack((src, np.zeros((n_sample_fit - n_sample,))))
        melgram = librosa.feature.melspectrogram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS)
        ret = librosa.power_to_db(melgram, ref=np.max)
        ret = ret[np.newaxis, np.newaxis, :]
    elif n_sample > n_sample_fit:
        N = int(floor(n_sample / n_sample_fit))
        src_total = src

        for i in range(N):
            src = src_total[i * n_sample_fit:(i + 1) * n_sample_fit]
            melgram = librosa.feature.melspectrogram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS)
            retI = librosa.power_to_db(melgram, ref=np.max)
            retI = retI[np.newaxis, np.newaxis, :]
            ret = np.concatenate((ret, retI), axis=0)

    return ret


In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
