In [2]:
import librosa
import scipy.io.wavfile as wav
import pandas as pd
import numpy as np
from scipy.signal import hamming
import h5py
import os

## MAKE FEATURE VECTOR REPRESENTATION

In [3]:
N_MFCC = 20
OVERLAP = False
MFCC_WIN_LENGTH = 512
N_MELFILTERS = 40

def calc_feature(audio, rate, win_length = MFCC_WIN_LENGTH, hop_length = None, n_fft = 512, window = hamming, n_mfcc = N_MFCC):
    """
    In this function we calculate MFCC, delta, delta-deltas features
    for signal representation
    
    Library 
        - for feature computing: Librosa
    
    """
    
    if n_fft == None:
        n_fft = win_length

    if hop_length == None:
        if OVERLAP:
            hop_length = int(win_length/4)
        else:
            hop_length = win_length

    # power spectrum        
    D = np.abs(librosa.stft(audio, n_fft=n_fft, hop_length=hop_length))**2

    # mel spectrogram
    S = librosa.feature.melspectrogram(S = D, sr = rate, htk = True)

    MFCC = librosa.feature.mfcc(S = librosa.logamplitude(S), n_mfcc = N_MFCC, n_mels = N_MELFILTERS)
    #+ 1e-8
    
    # substract cepstral_mean
    MFCC = MFCC[1:,:] #exclude 0th mcep
    
    MFCC_D = librosa.feature.delta(MFCC, order = 1)
    MFCC_DD = librosa.feature.delta(MFCC,order = 2)

    return MFCC, MFCC_D, MFCC_DD



In [4]:
#  test script
filename = 'p228_028.wav'

rate, audio = wav.read(filename)
MFCC, MFCC_D, MFCC_DD = calc_feature(audio, rate)

featurevector = np.vstack(calc_feature(audio, rate, n_mfcc = N_MFCC, win_length = MFCC_WIN_LENGTH))
print(featurevector.shape)

(57L, 127L)


# DATABASE FEATURE EXTRACTION

In [6]:
directory = '../../Antispoofing Datasets/ASVSpoof2015/wav/'

def compute(setName):
  
    if setName == 'train':
        protocol = './protocol/cm_train.trn'
    elif setName == 'develop':
        protocol = './protocol/cm_develop.ndx'  
    elif setName == 'evaluation':
        protocol = './protocol/cm_evaluation.ndx'
    else:
        print 'Invalid argument'
        
    data = pd.read_csv(protocol, delimiter=' ', names=["dictor", "name", "algorithm", "sp_hu"], dtype={"name": 'string'}) 
    
    f = h5py.File(setName + '.h5', "w")

    for j in range(len(data)):
        filename = directory + data.iloc[j].dictor + '/' + data.iloc[j]['name'] + '.wav'
        
        path = '/' + data.iloc[j].sp_hu + '/' + data.iloc[j].algorithm + '/' + data.iloc[j]['name']
        
        rate, audio = wav.read(filename)
        featurevector = np.vstack(calc_feature(audio, rate, n_mfcc = N_MFCC, win_length = MFCC_WIN_LENGTH))
        dset = f.create_dataset(path, featurevector.shape, dtype='float64')
        dset[...] = featurevector
        
compute('train')

T2_1000002


In [13]:
f = h5py.File('train.h5','r') 
data = f.get('/human/human/T2_1000001') 
data = np.array(data)
print data

[[ 39.48759521  36.06444562  40.76127196 ...,  64.69276071  70.12437135
   66.56866556]
 [ 28.1883011   32.55624761  33.00336663 ...,  19.81570828  30.59975461
   38.63468956]
 [ 20.92265522  29.43736939  26.29395217 ...,  15.49158634  22.57388951
   23.0439447 ]
 ..., 
 [  0.17935919   0.15266997   0.12884873 ...,   0.52592193   0.788404
    0.85030522]
 [  0.20233498   0.18162991   0.16750161 ...,  -0.08377266   0.20380387
    0.30339294]
 [  0.27861496   0.24582167   0.22015505 ...,  -0.24370179  -0.58880316
   -0.80495669]]
