In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pysptk

%matplotlib inline

In [2]:
import features as psf
import scipy.io.wavfile as wav

## MFCC

In [3]:
nc = 20

def generateMFCC(filename):
    (rate,sig) = wav.read(filename)
    mfcc_feat = psf.mfcc(sig, rate, numcep = nc)
    numOfRow = mfcc_feat.shape[0]
    
    sum = np.empty([0, mfcc_feat.shape[1]])
    sum = np.sum(mfcc_feat, axis = 0)
    sum /= numOfRow
    return sum

def model_dictor( dataframe, directory_in, directory_out):
    mfcc = []

    for i in dataframe.index:

        fileName = directory_in + dataframe.ix[i][0] + '\\' + dataframe.ix[i][1] + '.wav'

        mfcc_res = generateMFCC(fileName)
        mfcc.append(mfcc_res)
    
    return np.array(mfcc)

In [4]:
directory = r'N://Science//Antispoofing Datasets//ASVSpoof2015//wav//'
output_directory = r'N://Science//Antispoofing Research (ipython notebook)//gmm t-sne//mfcc_all//'

In [5]:
# train data

with open ("./protocol/cm_train.trn", "r") as myfile:
    data=myfile.read().split('\n')
    
for x in  range(len(data)): 
    data[x] =  data[x].split(' ')
    
dataframe = pd.DataFrame(data, columns=["dictor", "name", "algorithm", "sp_hu"])
uniquelabels = dataframe.algorithm.unique()

for i in range(uniquelabels.shape[0]):
    mfcc_output = model_dictor(dataframe[dataframe.algorithm == uniquelabels[i]], directory, output_directory)
    np.savetxt('./mfcc_all/mfcc_train_'+ str(uniquelabels[i]) + '.txt', mfcc_output)


In [None]:
# dev data

with open ("./protocol/cm_develop.ndx", "r") as myfile:
    data=myfile.read().split('\n')
    
for x in  range(len(data)): 
    data[x] =  data[x].split(' ')
    
dataframe = pd.DataFrame(data, columns=["dictor", "name", "algorithm", "sp_hu"])
uniquelabels = dataframe.algorithm.unique()

for i in range(uniquelabels.shape[0]):
    mfcc_output = model_dictor(dataframe[dataframe.algorithm == uniquelabels[i]], directory, output_directory)
    np.savetxt('./mfcc_all/mfcc_dev_'+ str(uniquelabels[i]) + '.txt', mfcc_output)

In [None]:
# eva data

with open ("./protocol/cm_evaluation.ndx", "r") as myfile:
    data=myfile.read().split('\n')
    
for x in  range(len(data)): 
    data[x] =  data[x].split(' ')
    
dataframe = pd.DataFrame(data, columns=["dictor", "name", "algorithm", "sp_hu"])
uniquelabels = dataframe.algorithm.unique()

for i in range(uniquelabels.shape[0]):
    mfcc_output = model_dictor(dataframe[dataframe.algorithm == uniquelabels[i]], directory, output_directory)
    np.savetxt('./mfcc_all/mfcc_eva_'+ str(uniquelabels[i]) + '.txt', mfcc_output)

In [42]:
mfcc_train_human = np.loadtxt('./mfcc_all/mfcc_train_human.txt')
mfcc_train_S1 = np.loadtxt('./mfcc_all/mfcc_train_S1.txt')
mfcc_train_S2 = np.loadtxt('./mfcc_all/mfcc_train_S2.txt')
mfcc_train_S3 = np.loadtxt('./mfcc_all/mfcc_train_S3.txt')
mfcc_train_S4 = np.loadtxt('./mfcc_all/mfcc_train_S4.txt')
mfcc_train_S5 = np.loadtxt('./mfcc_all/mfcc_train_S5.txt')


mfcc_eva_human = np.loadtxt('./mfcc_all/mfcc_eva_human.txt')
mfcc_eva_S1 = np.loadtxt('./mfcc_all/mfcc_eva_S1.txt')
mfcc_eva_S2 = np.loadtxt('./mfcc_all/mfcc_eva_S2.txt')
mfcc_eva_S3 = np.loadtxt('./mfcc_all/mfcc_eva_S3.txt')
mfcc_eva_S4 = np.loadtxt('./mfcc_all/mfcc_eva_S4.txt')
mfcc_eva_S5 = np.loadtxt('./mfcc_all/mfcc_eva_S5.txt')
mfcc_eva_S6 = np.loadtxt('./mfcc_all/mfcc_eva_S6.txt')
mfcc_eva_S7 = np.loadtxt('./mfcc_all/mfcc_eva_S7.txt')
mfcc_eva_S8 = np.loadtxt('./mfcc_all/mfcc_eva_S8.txt')
mfcc_eva_S9 = np.loadtxt('./mfcc_all/mfcc_eva_S9.txt')
mfcc_eva_S10 = np.loadtxt('./mfcc_all/mfcc_eva_S10.txt')


mfcc_dev_human = np.loadtxt('./mfcc_all/mfcc_train_human.txt')
mfcc_dev_S1 = np.loadtxt('./mfcc_all/mfcc_dev_S1.txt')
mfcc_dev_S2 = np.loadtxt('./mfcc_all/mfcc_dev_S2.txt')
mfcc_dev_S3 = np.loadtxt('./mfcc_all/mfcc_dev_S3.txt')
mfcc_dev_S4 = np.loadtxt('./mfcc_all/mfcc_dev_S4.txt')
mfcc_dev_S5 = np.loadtxt('./mfcc_all/mfcc_dev_S5.txt')

In [45]:
mfcc_spoof_train = np.vstack([mfcc_train_S1,mfcc_train_S2,mfcc_train_S3,mfcc_train_S4,mfcc_train_S5])
mfcc_spoof_dev = np.vstack([mfcc_dev_S1,mfcc_dev_S2,mfcc_dev_S3,mfcc_dev_S4,mfcc_dev_S5])
mfcc_spoof_eva = np.vstack([mfcc_eva_S1,mfcc_eva_S2,mfcc_eva_S3,mfcc_eva_S4,mfcc_eva_S5 /
                            mfcc_eva_S6,mfcc_eva_S7,mfcc_eva_S8,mfcc_eva_S9,mfcc_eva_S10])

In [49]:
np.savetxt('./mfcc_all/mfcc_spoof_train.txt', mfcc_spoof_train)
np.savetxt('./mfcc_all/mfcc_spoof_dev.txt', mfcc_spoof_dev)
np.savetxt('./mfcc_all/mfcc_spoof_eva.txt', mfcc_spoof_eva)

# F0

In [None]:
hop_length = 80

def extractF0( dataframe, directory_in, directory_out):
    f0_all = []
    
    for i in dataframe.index:

        fileName = directory_in + dataframe.ix[i][0] + '\\' + dataframe.ix[i][1] + '.wav'
        
        rate, audio = wav.read(fileName)
        f0 = pysptk.swipe(audio.astype(np.float64), fs=rate, hopsize=hop_length, min=60, max=240, otype="f0")
        f0_all.append(f0)
    
    return np.array(f0_all)

In [None]:
# train data

with open ("./protocol/cm_train.trn", "r") as myfile:
    data=myfile.read().split('\n')
    
for x in  range(len(data)): 
    data[x] =  data[x].split(' ')
    
dataframe = pd.DataFrame(data, columns=["dictor", "name", "algorithm", "sp_hu"])
uniquelabels = dataframe.algorithm.unique()

for i in range(uniquelabels.shape[0]):
    f0_output = extractF0(dataframe[dataframe.algorithm == uniquelabels[i]], directory, output_directory)
    np.savetxt('./f0/f0_train_'+ str(uniquelabels[i]) + '.txt', f0_output)

In [None]:
# dev data

with open ("./protocol/cm_develop.ndx", "r") as myfile:
    data=myfile.read().split('\n')
    
for x in  range(len(data)): 
    data[x] =  data[x].split(' ')
    
dataframe = pd.DataFrame(data, columns=["dictor", "name", "algorithm", "sp_hu"])
uniquelabels = dataframe.algorithm.unique()

for i in range(uniquelabels.shape[0]):
    f0_output = extractF0(dataframe[dataframe.algorithm == uniquelabels[i]], directory, output_directory)
    np.savetxt('./f0/f0_dev_'+ str(uniquelabels[i]) + '.txt', f0_output)

In [None]:
# eva data

with open ("./protocol/cm_evaluation.ndx", "r") as myfile:
    data=myfile.read().split('\n')
    
for x in  range(len(data)): 
    data[x] =  data[x].split(' ')
    
dataframe = pd.DataFrame(data, columns=["dictor", "name", "algorithm", "sp_hu"])
uniquelabels = dataframe.algorithm.unique()

for i in range(uniquelabels.shape[0]):
    f0_output = extractF0(dataframe[dataframe.algorithm == uniquelabels[i]], directory, output_directory)
    np.savetxt('./f0/f0_eva_'+ str(uniquelabels[i]) + '.txt', f0_output)


In [None]:
with open ("./protocol/cm_evaluation.ndx", "r") as myfile:
    data=myfile.read().split('\n')
    
for x in  range(len(data)): 
    data[x] =  data[x].split(' ')
    
dataframe = pd.DataFrame(data, columns=["dictor", "name", "algorithm", "sp_hu"])
uniquelabels = dataframe.algorithm.unique()

for i in range(3,uniquelabels.shape[0]):
    mfcc_output = model_dictor(dataframe[dataframe.algorithm == uniquelabels[i]], directory, output_directory)
    np.savetxt('./mfcc_all/mfcc_eva_'+ str(uniquelabels[i]) + '.txt', mfcc_output)