## Extract 64 Mel-frequency cepstral coefficients with Librosa
- fft window size = 2048

In [None]:
import librosa
import pandas as pd  
import numpy as np
import os

from __future__ import print_function  # for Python 3-style printing

In [2]:
## Takes WAV file path and returns a pandas dataframe containing normalized MFCC data

def wav_to_mfcc(audio_path):
    y, sr = librosa.load(audio_path)
    # Let's make a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=64, n_fft=2048)
    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.logamplitude(S, ref_power=np.max)
    log_S_rotated=np.rot90(log_S,3)
    log_S_df=pd.pandas.DataFrame(log_S_rotated)
    log_S_df['Value']=audio_path.split("/")[-1].split("|")[0]
    log_S_df['Basename']=audio_path.split("/")[-1].split("|")[-1].strip(".wav").strip(".mp4").strip(".mp3")
    return log_S_df

## Takes WAV file path and writes MFCC data to CSV in the current working directory

def mfcc_out(audio_path):
    mfcc_1=wav_to_mfcc(audio_path)
    mfcc_1.to_csv(audio_path.replace('.wav','')+".mfcc.csv", index=False)


In [3]:
## Change to directory containing speech tag excerpts in WAV form

wav_dir = os.path.expanduser('~/Dropbox/test_set_50_clips/')
os.chdir(wav_dir)
filenames=os.listdir('./')

## 

for filename in filenames:
    if ".wav" in filename:
        if filename.replace(".wav",".mfcc.csv") not in filenames:
            try:
                mfcc_out(filename)
            except:
                print("error: "+filename)


## Extract 13 Mel-frequency cepstral coefficients + 13 deltas + 13 delta-deltas
- fft window size = 2048

In [4]:
## Takes WAV file path and returns a pandas dataframe containing 13 normalized MFCCs + deltas + delta-deltas

def wav_to_mfcc_deltas(audio_path):
    y, sr = librosa.load(audio_path)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128, n_fft=2048)
    log_S = librosa.logamplitude(S, ref_power=np.max)
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
    delta_mfcc  = librosa.feature.delta(mfcc)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)
    mfcc_all=pd.concat([pd.DataFrame(mfcc),pd.DataFrame(delta_mfcc),pd.DataFrame(delta2_mfcc)])
    mfcc_rotated=np.rot90(mfcc_all,3)
    mfcc_df=pd.pandas.DataFrame(mfcc_rotated)
    mfcc_df['Value']=audio_path.split("/")[-1].split("|")[0]
    mfcc_df['Basename']=audio_path.split("/")[-1].split("|")[-1].strip(".wav").strip(".mp4").strip(".mp3")
    return mfcc_df

## Takes WAV file path and writes MFCC data to CSV in the current working directory

def mfcc_out_deltas(audio_path):
    mfcc_1=wav_to_mfcc(audio_path)
    mfcc_1.to_csv(audio_path.replace('.wav','')+"_deltas.mfcc.csv", index=False)


In [5]:
wav_dir = os.path.expanduser('~/Dropbox/test_set_50_clips/')
os.chdir(wav_dir)
filenames=os.listdir('./')


for filename in filenames:
    if ".wav" in filename:
        if filename.replace(".wav","_deltas.mfcc.csv") not in filenames:
            try:
                mfcc_out_deltas(filename)
            except:
                print("error : "+filename)
