In [8]:
from pathlib import Path
from tqdm import tqdm
import librosa
import numpy as np

# MFCCs
def preemphasis(signal, coeff=0.97):
    """Perform preemphasis on the input `signal`."""    
    return np.append(signal[0], signal[1:] - coeff*signal[:-1])


def extract_MFCCs(in_dir):
    paths = list(in_dir.rglob("*.wav"))
    features = {}
    for path in tqdm(sorted(paths), desc="Extracting MFCCs"):
        wav, sr = librosa.core.load(path, sr=None)
        wav = preemphasis(wav, coeff=0.97)

        mfcc = librosa.feature.mfcc(
            y=wav, sr=sr, n_mfcc=13, n_mels=24, 
            n_fft=int(np.floor(0.025*sr)),
            hop_length=int(np.floor(0.01*sr)), 
            fmin=64, fmax=8000
        )
        mfcc_delta = librosa.feature.delta(mfcc)
        mfcc_delta_delta = librosa.feature.delta(mfcc_delta)
        features[path.stem] = np.hstack([mfcc.T, mfcc_delta.T, mfcc_delta_delta.T])

    return features
    
in_dir = Path("data/librispeech-wav")
mfcc_feat_dict = extract_MFCCs(in_dir)

Extracting MFCCs: 100%|██████████| 2703/2703 [00:24<00:00, 108.87it/s]


In [12]:
out_dir = Path("features/mfccs/")
out_dir.mkdir(parents=True, exist_ok=True)
for path in mfcc_feat_dict:
    out_path = out_dir / f"{path}.npy"
    np.save(out_path, mfcc_feat_dict[path])