In [1]:
from features import mfcc
import scipy.io.wavfile as wav
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline

from __future__ import division
from sklearn.cross_validation import StratifiedKFold
from sklearn.externals.six.moves import xrange
from sklearn.mixture import GMM
from sklearn.metrics import accuracy_score, classification_report
from scipy import interp
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix

import os

# Функция генерации MFCC

In [2]:
nc = 20 

def generateMFCC(filename, fileOutput):
    (rate,sig) = wav.read(filename)
    mfcc_feat = mfcc(sig, rate, numcep = nc)
    numOfRow = mfcc_feat.shape[0]
    
    sum = np.empty([0, mfcc_feat.shape[1]])
    sum = np.sum(mfcc_feat, axis = 0)
    sum /= numOfRow
   
    for item in sum:
        fileOutput.write(str(item)+',')
    
    fileOutput.write('\n')
    return sum

In [3]:
n_dictors_original_train =  35
n_dictors_spoofing_train = 35

# Строим вектор признаков для набора дикторов

In [11]:
# Функция, принимающая на вход номер первого и последнего диктора,
# Дириректорию для входных и выходных данных

def model_dictor(start_id, end_id, directory_in, directory_out):
    n_samples = 0
    
    mfcc = []

    for j in range(start_id, end_id): 
        curr_directory = directory_in +  str(j)
        files_on_directory = os.listdir(curr_directory)

        print 'Generating speaker ' + str(j)+',' , len(files_on_directory), 'samples'

        fo = open( directory_out + str(j) + '.csv','w')

        for i in range(0, len(files_on_directory) ): 
            fileName = curr_directory + '\\' + files_on_directory[i]
            try:
                mfcc_res = generateMFCC(fileName, fo)
                mfcc.append(mfcc_res) 
            except:
                print "Error"
        fo.close()
    return np.array(mfcc)



#  Извлекаем MFCC для оригинальных дикторов. Train set

In [12]:
directory = r'F:\Science\Antispoofing\AntispoofingDataset\ASVSpoof2015\human\human\\'
output_directory = r'F:\Science\Notebook\01.10.16. Next_step\FeatureOriginal_Train\\'

start_dictor = 0
end_dictor = 35

mfcc_original_train = model_dictor(start_dictor,end_dictor,directory, output_directory)


Generating speaker 0, 188 samples
Generating speaker 1, 312 samples
Generating speaker 2, 347 samples
Generating speaker 3, 323 samples
Generating speaker 4, 335 samples
Generating speaker 5, 359 samples
Generating speaker 6, 414 samples
Generating speaker 7, 368 samples
Generating speaker 8, 329 samples
Generating speaker 9, 314 samples
Generating speaker 10, 451 samples
Generating speaker 11, 298 samples
Generating speaker 12, 410 samples
Generating speaker 13, 459 samples
Generating speaker 14, 334 samples
Generating speaker 15, 312 samples
Generating speaker 16, 350 samples
Generating speaker 17, 377 samples
Generating speaker 18, 311 samples
Generating speaker 19, 316 samples
Generating speaker 20, 474 samples
Generating speaker 21, 332 samples
Generating speaker 22, 293 samples
Generating speaker 23, 437 samples
Generating speaker 24, 320 samples
Generating speaker 25, 353 samples
Generating speaker 26, 335 samples
Generating speaker 27, 353 samples
Generating speaker 28, 335 sam

In [13]:
np.savetxt('mfcc_original_train.txt', mfcc_original_train)

In [14]:
# Перемешиваем речевые образцы оригинальных дикторов

mfcc_original_train_permutation = np.random.permutation(mfcc_original_train)

# Извлекаем MFCC для голосовых подделок. Train  set


In [15]:
directory = r'F:\Science\Antispoofing\AntispoofingDataset\ASVSpoof2015\wav\Train\\'
output_directory = r'F:\Science\Notebook\01.10.16. Next_step\FeatureSpoof_Train\\'

start_dictor = 0
end_dictor = 35

mfcc_spoof_train = model_dictor(start_dictor, end_dictor, directory, output_directory)


Generating speaker 0, 1815 samples
Generating speaker 1, 1815 samples
Generating speaker 2, 1815 samples
Generating speaker 3, 1815 samples
Generating speaker 4, 1815 samples
Generating speaker 5, 1815 samples
Generating speaker 6, 1815 samples
Generating speaker 7, 1815 samples
Generating speaker 8, 1815 samples
Generating speaker 9, 1815 samples
Generating speaker 10, 1815 samples
Generating speaker 11, 1815 samples
Generating speaker 12, 1815 samples
Generating speaker 13, 1815 samples
Generating speaker 14, 1815 samples
Generating speaker 15, 1814 samples
Generating speaker 16, 1815 samples
Generating speaker 17, 1815 samples
Generating speaker 18, 1815 samples
Generating speaker 19, 1815 samples
Generating speaker 20, 1815 samples
Generating speaker 21, 1815 samples
Generating speaker 22, 1815 samples
Generating speaker 23, 1815 samples
Generating speaker 24, 1813 samples
Generating speaker 25, 1815 samples
Generating speaker 26, 1815 samples
Generating speaker 27, 1815 samples
Ge

In [16]:
#Перемешиваем подделки
mfcc_spoof_train_permutation = np.random.permutation(mfcc_spoof_train)

In [20]:
np.savetxt('mfcc_spoof_train.txt', mfcc_spoof_train)

# Формируем общий вектор с набором MFCC голосовых подделок и оригинальных образцов. Traint set

In [18]:
X_train = np.concatenate([mfcc_original_train_permutation, mfcc_spoof_train_permutation]) 
# Метки класов
y_train = np.concatenate([np.zeros((mfcc_original_train_permutation.shape[0])), np.ones((mfcc_spoof_train_permutation.shape[0]))]).astype('int')


# Извлекаем MFCC для оригинальных дикторов. Development Set


In [19]:
directory = r'F:\Science\Antispoofing\AntispoofingDataset\ASVSpoof2015\human\human\\'
output_directory = r'F:\Science\Notebook\01.10.16. Next_step\FeatureOriginal_Develop\\'

start_dictor = 35
end_dictor = 81

mfcc_original_develop = model_dictor(start_dictor,end_dictor,directory, output_directory)

# Перемешиваем речевые образцы оригинальных дикторов
mfcc_original_develop_permutation = np.random.permutation(mfcc_original_develop)

Generating speaker 35, 350 samples
Generating speaker 36, 427 samples
Generating speaker 37, 482 samples
Generating speaker 38, 306 samples
Generating speaker 39, 378 samples
Generating speaker 40, 374 samples
Generating speaker 41, 364 samples
Generating speaker 42, 354 samples
Generating speaker 43, 419 samples
Generating speaker 44, 411 samples
Generating speaker 45, 368 samples
Generating speaker 46, 391 samples
Generating speaker 47, 465 samples
Generating speaker 48, 377 samples
Generating speaker 49, 419 samples
Generating speaker 50, 417 samples
Generating speaker 51, 366 samples
Generating speaker 52, 362 samples
Generating speaker 53, 416 samples
Generating speaker 54, 327 samples
Generating speaker 55, 425 samples
Generating speaker 56, 382 samples
Generating speaker 57, 359 samples
Generating speaker 58, 424 samples
Generating speaker 59, 380 samples
Generating speaker 60, 368 samples
Generating speaker 61, 354 samples
Generating speaker 62, 380 samples
Generating speaker 6

In [21]:
np.savetxt('mfcc_original_develop.txt', mfcc_original_develop)

# Извлекаем MFCC для голосовых подделок. Development Set


In [22]:
directory = r'F:\Science\Antispoofing\AntispoofingDataset\ASVSpoof2015\wav\Develop\\'
output_directory = r'F:\Science\Notebook\01.10.16. Next_step\FeatureSpoof_Develop\\'

start_dictor = 0
end_dictor = 46

mfcc_spoof_develop = model_dictor(start_dictor, end_dictor, directory, output_directory)

#Перемешиваем подделки
mfcc_spoof_develop_permutation = np.random.permutation(mfcc_spoof_develop)

Generating speaker 0, 4590 samples
Generating speaker 1, 4636 samples
Generating speaker 2, 4616 samples
Generating speaker 3, 4501 samples
Generating speaker 4, 4631 samples
Generating speaker 5, 4635 samples
Generating speaker 6, 4637 samples
Generating speaker 7, 4635 samples
Generating speaker 8, 4640 samples
Generating speaker 9, 4634 samples
Generating speaker 10, 4635 samples
Generating speaker 11, 4589 samples
Generating speaker 12, 4672 samples
Generating speaker 13, 4630 samples
Generating speaker 14, 4665 samples
Generating speaker 15, 4692 samples
Generating speaker 16, 4623 samples
Generating speaker 17, 4666 samples
Generating speaker 18, 4530 samples
Generating speaker 19, 4575 samples
Generating speaker 20, 4616 samples
Generating speaker 21, 4608 samples
Generating speaker 22, 4629 samples
Generating speaker 23, 4609 samples
Generating speaker 24, 4637 samples
Generating speaker 25, 4633 samples
Generating speaker 26, 4623 samples
Generating speaker 27, 4610 samples
Ge

In [23]:
np.savetxt('mfcc_spoof_develop.txt', mfcc_spoof_develop)

# Общий вектор с голосовыми подделками и оригинальными. Метки классов. Develop set


In [24]:
X_develop = np.concatenate([mfcc_original_develop_permutation, mfcc_spoof_develop_permutation]) 
# Делаем метки класов
y_develop = np.concatenate([np.zeros((mfcc_original_develop_permutation.shape[0])), np.ones((mfcc_spoof_develop_permutation.shape[0]))]).astype('int')


# Извлекаем MFCC для оригинальных дикторов. Test Set


In [26]:
directory = r'F:\Science\Antispoofing\AntispoofingDataset\ASVSpoof2015\human\human\\'
output_directory = r'F:\Science\Notebook\01.10.16. Next_step\FeatureOriginal_Test\\'

start_dictor = 81
end_dictor = 106

mfcc_original_test= model_dictor(start_dictor,end_dictor,directory, output_directory)

# Перемешиваем речевые образцы оригинальных дикторов
mfcc_original_test_permutation = np.random.permutation(mfcc_original_test)

Generating speaker 81, 383 samples
Generating speaker 82, 379 samples
Generating speaker 83, 382 samples
Generating speaker 84, 379 samples
Generating speaker 85, 357 samples
Generating speaker 86, 380 samples
Generating speaker 87, 380 samples
Generating speaker 88, 379 samples
Generating speaker 89, 380 samples
Generating speaker 90, 380 samples
Generating speaker 91, 375 samples
Generating speaker 92, 382 samples
Generating speaker 93, 382 samples
Generating speaker 94, 366 samples
Generating speaker 95, 348 samples
Generating speaker 96, 355 samples
Generating speaker 97, 379 samples
Generating speaker 98, 379 samples
Generating speaker 99, 380 samples
Generating speaker 100, 379 samples
Generating speaker 101, 379 samples
Generating speaker 102, 375 samples
Generating speaker 103, 257 samples
Generating speaker 104, 380 samples
Generating speaker 105, 290 samples


In [27]:
np.savetxt('mfcc_original_test.txt', mfcc_original_test)

# Извлекаем MFCC  для голосовых подделок. Test Set

In [28]:
directory = r'F:\Science\Antispoofing\AntispoofingDataset\ASVSpoof2015\wav\Test\\'
output_directory = r'F:\Science\Notebook\01.10.16. Next_step\FeatureSpoof_Test\\'

start_dictor = 0
end_dictor = 25

mfcc_spoof_test = model_dictor(start_dictor, end_dictor, directory, output_directory)

#Перемешиваем подделки
mfcc_spoof_test_permutation = np.random.permutation(mfcc_spoof_test)

Generating speaker 0, 655 samples
Generating speaker 1, 655 samples
Generating speaker 2, 655 samples
Generating speaker 3, 655 samples
Generating speaker 4, 655 samples
Generating speaker 5, 655 samples
Generating speaker 6, 655 samples
Generating speaker 7, 655 samples
Generating speaker 8, 655 samples
Generating speaker 9, 655 samples
Generating speaker 10, 655 samples
Generating speaker 11, 655 samples
Generating speaker 12, 655 samples
Generating speaker 13, 655 samples
Generating speaker 14, 655 samples
Generating speaker 15, 655 samples
Generating speaker 16, 655 samples
Generating speaker 17, 655 samples
Generating speaker 18, 655 samples
Generating speaker 19, 655 samples
Generating speaker 20, 655 samples
Generating speaker 21, 655 samples
Generating speaker 22, 655 samples
Generating speaker 23, 655 samples
Generating speaker 24, 655 samples


In [29]:
np.savetxt('mfcc_spoof_test.txt', mfcc_spoof_test)

In [30]:
X_test = np.concatenate([mfcc_original_test_permutation, mfcc_spoof_test_permutation]) 
# Делаем метки класов
y_test = np.concatenate([np.zeros((mfcc_original_test_permutation.shape[0])), np.ones((mfcc_spoof_test_permutation.shape[0]))]).astype('int')
