Import libraries

In [1]:
import numpy
import librosa
import librosa.display
import matplotlib.pyplot as plotlib
import os
import statistics
import functools as f

Make data architecture for sound categories and sound files

In [2]:
sound = {}
sound_cats = ["mic_friction", "breathing", "mumbles", "long_hi_pitched"] # sound categories

for category in sound_cats:
    sound[category] = {}

Write a program to analyze a soundfile and extract the necessary info for a log spectrogram, mel spectrogram, chromagram, and mfccs

In [5]:
def getSoundInfo(file_path, n_fft=2048, hop_length=32, win_length=1024, n_mels=128, n_chroma=12, n_mfcc=13):
    
    instAmps, sr = librosa.load(file_path, mono=True, sr=None)
    
    magsAndPhases = librosa.stft(instAmps, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
    mags = numpy.abs(magsAndPhases)
    magsDB = librosa.amplitude_to_db(mags, ref=numpy.max)
    
    melInfo = librosa.feature.melspectrogram(y=instAmps, sr=sr, n_mels=n_mels)
    melDB = librosa.power_to_db(melInfo, ref=numpy.max)
    
    instAmpsHarm, instAmpsPerc = librosa.effects.hpss(instAmps)
    chroma = librosa.feature.chroma_stft(y=instAmpsHarm, sr=sr, n_chroma=n_chroma)
    
    mfcc = librosa.feature.mfcc(S=melDB, n_mfcc=n_mfcc)
    
    return { "instAmps": instAmps, "sr": sr, "magsDB": magsDB, "melDB": melDB, "chroma": chroma, "mfcc": mfcc }

In [6]:
getSoundInfo('./audio/categories/mic_friction/00.wav')

{'chroma': array([[0.8269854 , 0.8573789 , 0.65579335, ..., 0.89052603, 0.8719003 ,
         0.41002861],
        [0.82737227, 0.81259764, 0.60778725, ..., 0.9420317 , 0.92189722,
         0.52817684],
        [0.83149248, 0.74982588, 0.64539651, ..., 0.97400558, 0.97054673,
         0.66319684],
        ...,
        [0.84302425, 0.76443797, 0.85562974, ..., 0.92555478, 0.83425181,
         0.47111582],
        [0.79367264, 0.83619172, 0.79356521, ..., 0.86328526, 0.80761876,
         0.39089898],
        [0.87889296, 0.97900286, 0.83266899, ..., 0.8251621 , 0.79988718,
         0.34817376]]),
 'instAmps': array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 'magsDB': array([[-64.97078 , -64.97002 , -64.96423 , ..., -47.804718, -46.69642 ,
         -46.27717 ],
        [-67.28287 , -67.19441 , -66.95633 , ..., -47.020477, -46.74076 ,
         -46.61455 ],
        [-77.966446, -76.44974 , -73.809326, ..., -47.787758, -48.564438,
         -48.91045 ],
        ...,
        [-80.      , -