### Song Classifier

##### Date: 7/11/2024
##### Authors: Edwardia Fosah, Ye Yint Hmine, Zoe Granadoz, Bryan Wang, Manya Tandon

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from microphone import record_audio
from IPython.display import Audio
from typing import Tuple
import librosa

from numba import njit
from scipy.ndimage.morphology import generate_binary_structure
from scipy.spatial.distance import cdist

%matplotlib inline

  from scipy.ndimage.morphology import generate_binary_structure


In [2]:
SAMPLING_RATE = 44100

**Load audio file from database**

In [3]:
def load_music_file(file_path: str):
    """Loads a target music file path.

    Parameters
    ----------
    file_path : str
        File path of song
        
    Returns
    -------
    recorded_audio: np.ndarray
        Audio samples

    sampling_rate: int
    """

    audio, samp_rate = librosa.load(file_path, sr=SAMPLING_RATE, mono=True)
    return audio, samp_rate

**Audio to integers**

In [4]:
def convert_mic_frames_to_audio(frames: np.ndarray) -> np.ndarray:
    """Converts frames taken from microphone to 16-bit integers
    
    Parameters
    ----------
    frames : np.ndarray
        List of bytes recorded from a microphone
        
    Returns
    -------
    numpy.ndarray
        Bytes converted to 16-bit integers
    """

    return np.hstack([np.frombuffer(i, np.int16) for i in frames])

In [5]:
def record_from_mic(listen_time, path_name = ""): # just a wrapper/helper function for us to use when testing entire model w/laptops
    
    frames, sample_rate = record_audio(listen_time)
    samples = convert_mic_frames_to_audio(frames)
    
    Audio(samples, rate = sample_rate)

    if path_name != "": # can also use fun when making test audios, just so its faster
        np.save(path_name, np.hstack((sample_rate, samples)))



*optional*

In [8]:
def make_random_clips(samples: np.ndarray, *, desired_length: int, count: int):
    """Takes audio samples and cuts {count} number of {desired_length} clips.

    Parameters
    ----------
    samples: np.ndarray
        Array of audio samples

    desired_length: int
        Length of each clip in seconds

    count: int
        Total number of clips
        
    Returns
    -------
    np.ndarray, shape-(count,N)
        2-D array with {count} number of clip samples
    """
    import random
    
    N = len(samples)
    sampling_rate = SAMPLING_RATE
    T = N / sampling_rate
    percent_of_duration = desired_length / T
    samples_per_clip = int(percent_of_duration * len(samples))
    
    clip_samples = []

    for i in range(count):
        random_sample_idx = random.randrange(0, N - samples_per_clip)
        clip_sample = samples[random_sample_idx : random_sample_idx + samples_per_clip]
        clip_samples.append(clip_sample)

    return np.array(clip_samples)

**Plot Spectrogram**

In [None]:
def dig_samp_to_spec (samples):
    # data = np.hstack([np.frombuffer(i, np.int16) for i in frames])

    # using matplotlib's built-in spectrogram function
    fig, ax = plt.subplots()

    S, freqs, times, im = ax.specgram(
        data,
        NFFT=4096,
        Fs=SAMPLING_RATE,
        window=mlab.window_hanning,
        noverlap=4096 // 2,
        mode='magnitude'
    )
    ax.set_ylim(0, 10000)
    ax.set_xlabel("time (sec)")
    ax.set_ylabel("frequency (Hz)")
    return fig, ax

**Find Peaks**

In [None]:
@njit
def _peaks(data_2d, rows, cols, amp_min):
    """
    A Numba-optimized 2-D peak-finding algorithm.
    
    Parameters
    ----------
    data_2d : numpy.ndarray, shape-(H, W)
        The 2D array of data in which local peaks will be detected.

    rows : numpy.ndarray, shape-(N,)
        The 0-centered row indices of the local neighborhood mask
    
    cols : numpy.ndarray, shape-(N,)
        The 0-centered column indices of the local neighborhood mask
        
    amp_min : float
        All amplitudes at and below this value are excluded from being local 
        peaks.
    
    Returns
    -------
    List[Tuple[int, int]]
        (row, col) index pair for each local peak location. 
    """
    peaks = []
    
    # iterate over the 2-D data in col-major order
    for c, r in np.ndindex(*data_2d.shape[::-1]):
        if data_2d[r, c] <= amp_min:
            continue

        for dr, dc in zip(rows, cols):
            if dr == 0 and dc == 0:
                continue

            if not (0 <= r + dr < data_2d.shape[0]):
                dr *= -1

            if not (0 <= c + dc < data_2d.shape[1]):
                dc *= -1

            if data_2d[r, c] < data_2d[r + dr, c + dc]:
                break
        else:
            peaks.append((r, c))
    return peaks

In [None]:
def local_peak_locations(data_2d, neighborhood, amp_min):
    """
    From 
    Defines a local neighborhood and finds the local peaks
    in the spectrogram, which must be larger than the specified `amp_min`.
    
    Parameters
    ----------
    data_2d : numpy.ndarray, shape-(H, W)
        The 2D array of data in which local peaks will be detected
    
    neighborhood : numpy.ndarray, shape-(h, w)
        A boolean mask indicating the "neighborhood" in which each
        datum will be assessed to determine whether or not it is
        a local peak. h and w must be odd-valued numbers
        
    amp_min : float
        All amplitudes at and below this value are excluded from being local 
        peaks.
    
    Returns
    -------
    List[Tuple[int, int]]
        (row, col) index pair for each local peak location.
    
    Notes
    -----
    The local peaks are returned in column-major order.
    """
    rows, cols = np.where(neighborhood)
    assert neighborhood.shape[0] % 2 == 1
    assert neighborhood.shape[1] % 2 == 1

    rows -= neighborhood.shape[0] // 2
    cols -= neighborhood.shape[1] // 2
    
    return _peaks(data_2d, rows, cols, amp_min=amp_min)