In [None]:
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline

from __future__ import division
from sklearn.cross_validation import StratifiedKFold
from sklearn.externals.six.moves import xrange
from sklearn.mixture import GMM
from sklearn.metrics import accuracy_score, classification_report
from scipy import interp
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix

import os

### Mfcc extraction. Samples

In [2]:
nc = 20 

def generateMFCC(filename, fileOutput):
    (rate,sig) = wav.read(filename)
    mfcc_feat = mfcc(sig, rate, numcep = nc)
    numOfRow = mfcc_feat.shape[0]
    
    sum = np.empty([0, mfcc_feat.shape[1]])
    sum = np.sum(mfcc_feat, axis = 0)
    sum /= numOfRow
   
    for item in sum:
        fileOutput.write(str(item)+',')
    
    fileOutput.write('\n')
    return sum

### Load labels of data

In [3]:
import pandas as pd

dataframe_train = pd.read_csv('train_label.csv')
dataframe_dev = pd.read_csv('dev_label.csv')
dataframe_eva = pd.read_csv('eva_label.csv')

In [4]:
def model_dictor( dataframe, directory_in, directory_out, typeSignal):
    mfcc = []

    name = dataframe[dataframe.Sp_hu == typeSignal]

    for i in name.index:

        fileName = directory_in + name.ix[i][0] + '\\' + name.ix[i][1] + '.wav'

        fileOut = open(directory_out + name.ix[i][1] + '.csv','w')
        mfcc_res = generateMFCC(fileName, fileOut)
        mfcc.append(mfcc_res)
        fileOut.close()
    
    return np.array(mfcc)

# MFCC of original speakers

## Train set

In [6]:
directory = r'F:\Science\Antispoofing\AntispoofingDataset\ASVSpoof2015\wav\\'
output_directory = r'F:\Science\Notebook\25.10.16. Next_step\FeatureOriginal_Train\\'

mfcc_original_train = model_dictor(dataframe_train, directory, output_directory, 'human')
np.savetxt('mfcc_original_train.txt', mfcc_original_train)

## Development set

In [8]:

output_directory = r'F:\Science\Notebook\25.10.16. Next_step\FeatureOriginal_Dev\\'

mfcc_original_dev = model_dictor(dataframe_dev, directory, output_directory, 'human')
np.savetxt('mfcc_original_dev.txt', mfcc_original_dev)

## Evaluation set

In [10]:
output_directory = r'F:\Science\Notebook\25.10.16. Next_step\FeatureOriginal_Eva\\'

mfcc_original_eva = model_dictor(dataframe_eva, directory, output_directory, 'human')

np.savetxt('mfcc_original_eva.txt', mfcc_original_eva)

# MFCC spoof samples

In [5]:
output_directory = r'F:\Science\Notebook\25.10.16. Next_step\FeatureSpoof_Train\\'

mfcc_spoof_train = model_dictor(dataframe_train, directory, output_directory, 'spoof')
np.savetxt('mfcc_spoof_train.txt', mfcc_spoof_train)

In [None]:
output_directory = r'F:\Science\Notebook\25.10.16. Next_step\FeatureSpoof_Dev\\'

mfcc_spoof_dev = model_dictor(dataframe_dev, directory, output_directory, 'spoof')
np.savetxt('mfcc_spoof_dev.txt', mfcc_spoof_dev)

In [None]:
output_directory = r'F:\Science\Notebook\25.10.16. Next_step\FeatureSpoof_Eva\\'

mfcc_spoof_eva = model_dictor(dataframe_eva, directory, output_directory, 'spoof')
np.savetxt('mfcc_spoof_eva.txt', mfcc_spoof_eva)