# Extracting MFCC Features from Pathological dataset 2

In [1]:
import numpy as np
import scipy.io.wavfile
from scipy.fftpack import fft, dct, fftshift
import matplotlib.pyplot as plt 
from scipy import signal
import librosa
import librosa.display
import pandas as pd
from scipy.stats import skew, kurtosis

In [7]:
from utils import RAW_DATA_PATH

df = pd.read_csv(RAW_DATA_PATH / 'ASMDD.csv')

df['Pronunciation'] = np.int_(df['Pronunciation'] == 'Wrong')

df

Unnamed: 0,Speaker ID,Gender,Number of Words,Word ID,Pronunciation,Path
0,14,Boy,100,10,0,ASMDD/speaker_14_b_100/10.wav
1,14,Boy,100,9,0,ASMDD/speaker_14_b_100/9.wav
2,14,Boy,100,3,0,ASMDD/speaker_14_b_100/3.wav
3,14,Boy,100,2,0,ASMDD/speaker_14_b_100/2.wav
4,14,Boy,100,8,0,ASMDD/speaker_14_b_100/8.wav
...,...,...,...,...,...,...
5292,99,Boy,50,5,1,ASMDD/speaker_99_b_50/5_N.wav
5293,99,Boy,50,28,1,ASMDD/speaker_99_b_50/28_N.wav
5294,99,Boy,50,31,1,ASMDD/speaker_99_b_50/31_N.wav
5295,99,Boy,50,42,0,ASMDD/speaker_99_b_50/42.wav


In [42]:
right_paths = df[df['Pronunciation'] == 0]['Path'].values

right_paths = str(RAW_DATA_PATH) + '/' + right_paths + '\n'
right_paths[-1] = right_paths[-1].strip('\n')

with open('healthy_wav_files.txt', 'w') as file:
    file.writelines(right_paths)
    

wrong_paths = df[df['Pronunciation'] == 1]['Path'].values
wrong_paths = str(RAW_DATA_PATH) + '/' + wrong_paths + '\n'
wrong_paths[-1] = wrong_paths[-1].strip('\n')

with open('patient_wav_files.txt', 'w') as file:
    file.writelines(wrong_paths)
    

In [18]:
def plotAudio(audio, sample_rate):
    plt.figure(figsize=(17,5))
    plt.plot(np.linspace(0, len(audio) / sample_rate, num=len(audio)), audio)
    plt.grid(True)

In [19]:
def loadAudioFile(filename):
    fs, audioInput = scipy.io.wavfile.read(filename)
    return audioInput, fs

In [20]:
def preemphasis(audioInput):
    alpha = 0.95
    emphasized_audio = np.append(audioInput[0], audioInput[1:] - alpha * audioInput[:-1])
    return emphasized_audio

In [21]:
def frameBlocking(audio, frameSize, overlap):
    frameSize = int(frameSize)
    overlap = int(overlap)
    num_frames = int(np.ceil(len(audio)/(frameSize - overlap))) 

    padding = ((frameSize-overlap)*num_frames) - len(audio) 
    zeros = np.zeros((padding))
    audio = np.append(audio, zeros) 
    
    frames = np.empty((frameSize, num_frames)) 
    start = 0
    for i in range(num_frames):
        frames[:,i] = audio[start:start + frameSize]
        start = (frameSize-overlap)*i 
        
    frames = frames.T
    
    return frames

In [22]:
def applyWindow(frames, frameSize):
    
    window = np.hamming(frameSize)
    windowed_frames = frames * window
    
    return windowed_frames

In [23]:
def findPeriodogram(windowed_frames, frameSize, nfft):
    audio_fft = np.absolute(fft(windowed_frames,nfft))
    audio_fft = audio_fft[:,:nfft//2+1]

    periodogram = ((1.0 / nfft) * ((audio_fft) ** 2))
    
    return periodogram

In [24]:
def createMelFilterBank(numFilters, nfft, fs):
    fmin_mel = 0
    fmax_mel = (2595 * np.log10(1 + (fs // 2) / 700))
    mel = np.linspace(fmin_mel, fmax_mel, numFilters+2)
    hertz = (700 * (10**(mel / 2595) - 1))
    fbins = np.floor((nfft + 1) * hertz / fs)
    fbank = np.zeros((nfft//2+1, numFilters))
    
    for i in range(1,numFilters+1):
        for k in range(int(nfft//2 + 1)):
            if k < fbins[i-1]:
                fbank[k, i-1] = 0
            elif k >= fbins[i-1] and k < fbins[i]:
                fbank[k,i-1] = (k - fbins[i-1])/(fbins[i] - fbins[i-1])
            elif k >= fbins[i] and k < fbins[i+1]:
                fbank[k,i-1] = (fbins[i+1] - k)/(fbins[i+1] - fbins[i])
            else:
                fbank[k,i-1] = 0
    
    return fbank

In [25]:
def filtering(periodogram, fbank):    
    melFiltered = np.log10(np.dot(periodogram, fbank))
    return melFiltered

In [26]:
def findMFCC(melFiltered):
    mel_coeff = dct(melFiltered, type=3)
    return mel_coeff 

In [27]:
def meanNormalisation(mfcc):    
    norm_mfcc = mfcc - (np.mean(mfcc, axis=0) + 1e-8)
    return norm_mfcc

In [37]:
def extractMfcc(flag):
    feat = np.zeros((1,48))
    nfft = 512;
    maxi = -1
    numFilters = 12
    fbank = createMelFilterBank(numFilters, nfft, 44100)
    if(flag == 1):
        file = open('./patient_wav_files.txt').read()
    else:
        file = open('./healthy_wav_files.txt').read()
    audio_files = file.split('\n')
    for num, filename in enumerate(audio_files):
        audioInput, fs = loadAudioFile(filename)
#         highest = 202272
        frameSize = 0.020*fs
        overlap = (frameSize/2)
        emphasized_audio = preemphasis(audioInput)
        frames = frameBlocking(emphasized_audio, frameSize, overlap)
        windowed_frames = applyWindow(frames, frameSize)
        periodogram = findPeriodogram(windowed_frames, frameSize, nfft)
        melFiltered = filtering(periodogram, fbank)
        mfcc = findMFCC(melFiltered)
        mean_normalized_mfcc = meanNormalisation(mfcc)
#         audio_num = str(flag)+str(num)
        mean_normalized_mfcc = np.transpose(mean_normalized_mfcc)
#         print(mean_normalized_mfcc.shape)
        ar = []
        for coefficient in mean_normalized_mfcc:
            cm = np.mean(coefficient)
            cstd = np.std(coefficient)
            cskew = skew(coefficient)
            ckurtosis = kurtosis(coefficient)
            ar.append(cm)
            ar.extend([cstd, cskew, ckurtosis])
#         print(len(ar))
        feat = np.vstack((feat, ar))
    return feat

In [43]:
patient_feature_frames = extractMfcc(1)

  fs, audioInput = scipy.io.wavfile.read(filename)


In [45]:
print(patient_feature_frames[0])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [46]:
healthy_feature_frames = extractMfcc(0)

  fs, audioInput = scipy.io.wavfile.read(filename)


In [47]:
# patient_feature_frames = np.delete(patient_feature_frames, 0, 0)
# healthy_feature_frames = np.delete(healthy_feature_frames, 0, 0)

# patient_feature_frames = np.delete(patient_feature_frames, 13, 1)
# healthy_feature_frames = np.delete(healthy_feature_frames, 13, 1)

print(patient_feature_frames.shape, healthy_feature_frames.shape)

(758, 48) (4541, 48)


In [48]:
np.savetxt('./mfcc_features_patient.csv', patient_feature_frames)

In [49]:
np.savetxt('./mfcc_features_healthy.csv', healthy_feature_frames)