In [1]:
from features import mfcc
import scipy.io.wavfile as wav
import numpy as np
import time 
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline

from __future__ import division
from sklearn.cross_validation import StratifiedKFold
from sklearn.externals.six.moves import xrange
from sklearn.mixture import GMM
from sklearn.metrics import accuracy_score, classification_report
from scipy import interp
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix

import os

In [2]:
mfcc_original_train = np.loadtxt('mfcc_original_train.txt')
mfcc_original_train_permutation = np.random.permutation(mfcc_original_train)

mfcc_spoof_train = np.loadtxt('mfcc_spoof_train.txt')
mfcc_spoof_train_permutation = np.random.permutation(mfcc_spoof_train)

mfcc_original_develop = np.loadtxt('mfcc_original_develop.txt')
mfcc_original_develop_permutation = np.random.permutation(mfcc_original_develop)

mfcc_spoof_develop = np.loadtxt('mfcc_spoof_develop.txt')
mfcc_spoof_develop_permutation = np.random.permutation(mfcc_spoof_develop)

mfcc_original_test = np.loadtxt('mfcc_original_test.txt')
mfcc_original_test_permutation = np.random.permutation(mfcc_original_test)

mfcc_spoof_test = np.loadtxt('mfcc_spoof_test.txt')
mfcc_spoof_test_permutation = np.random.permutation(mfcc_spoof_test)

# Train set

In [3]:
X_train = np.concatenate([mfcc_original_train_permutation, mfcc_spoof_train_permutation]) 
y_train = np.concatenate([np.zeros((mfcc_original_train_permutation.shape[0])), np.ones((mfcc_spoof_train_permutation.shape[0]))]).astype('int')

# Development set 

In [4]:
X_develop = np.concatenate([mfcc_original_develop_permutation, mfcc_spoof_develop_permutation]) 
y_develop = np.concatenate([np.zeros((mfcc_original_develop_permutation.shape[0])), np.ones((mfcc_spoof_develop_permutation.shape[0]))]).astype('int')


# Test set

In [5]:
X_test = np.concatenate([mfcc_original_test_permutation, mfcc_spoof_test_permutation]) 
y_test = np.concatenate([np.zeros((mfcc_original_test_permutation.shape[0])), np.ones((mfcc_spoof_test_permutation.shape[0]))]).astype('int')


# Learn

In [None]:
n_g = 1024

for covar_type in ['spherical', 'diag', 'tied', 'full']:
    number_gaussian = []
    acc = []

    tic = time.time() #Время работы типа ковариации. Старт
    confusion_matrixLog = []
    
    for i in range(1,n_g, 2):
        timeGausStart = time.time()
        
        number_gaussian.append(i) #Число Ядер Гауса
        
        original_train_shape = mfcc_original_train_permutation.shape[0]
        # Обучаем модель оригинальных дикторов на тренировочном наборе

        g1 =  GMM(n_components = n_g, covariance_type=covar_type, init_params='wc', n_iter=20)
        g1.fit(X_train[0:original_train_shape], y_train[0:original_train_shape])

        # Обучаем модель голосовых подделок на тренировочном наборе
        g2 =  GMM(n_components = n_g, covariance_type=covar_type, init_params='wc', n_iter=20)
        g2.fit(X_train[original_train_shape:X_train.shape[0]], y_train[original_train_shape:X_train.shape[0]])
        
        # Дообучаем модели

        original_develop_shape = mfcc_original_train_permutation.shape[0]
        # Дообучаем модель оригинальных дикторов на develop наборе

        g1.fit(X_develop[0:original_develop_shape], y_train[0:original_develop_shape])

        # Дообучаем модель голосовых подделок на develop наборе
        g2.fit(X_train[original_develop_shape:X_develop.shape[0]], y_train[original_develop_shape:X_develop.shape[0]])

        

        prediction  = np.array(g1.score(X_test) < g2.score(X_test)).astype('int')

        accuracy = np.mean(prediction == y_test) * 100
        acc.append(accuracy)
        #print 'ng = ', n_g, 'accuracy = ', accuracy
        cm = confusion_matrix(y_test, prediction)   


        confusion_matrixLog.append(cm)
        
        #----Лог для confusion matrix ------
        with open(r'F:\Science\Notebook\01.10.16. Next_step\Experiment\ConfusionLog-'+covar_type+'.txt', 'a') as c:
            c.write('\n--------------------\n')
            text =  ('Covar_type: ' +  covar_type + ', number gaussian = ' ,  i)
            c.write(str(text))
            c.write('\nCovar_matrix: \n')
            c.write(" ".join(map(str, cm)))
            
        #----Конец -------------------
        
        #tr_vc = train_accuracy * (n_dictors_vc) / (n_dictors_vc + n_dictors)
        #tr_or = train_accuracy * (n_dictors) / (n_dictors_vc + n_dictors)
        
        #-----Accuracy Log --------
        with open(r'F:\Science\Notebook\01.10.16. Next_step\Experiment\AccuracyLog-'+covar_type+'.txt', 'a') as al:
            al.write('\n--------------------\n')
            text =  ('Covar_type: ',  covar_type , ', number gaussian' , i)
            al.write(str(text))
            text2 = ( ' Train accuracy: %.1f' % accuracy)
            al.write(str(text2))
 
        timeGausEnd = time.time()
        timeGausWork = timeGausEnd - timeGausStart
        
        #Лог для времени работы количества ядер
        with open(r'F:\Science\Notebook\01.10.16. Next_step\Experiment\LogCoreTime-'+covar_type+'.txt', 'a') as t:
            text =  ('Covar_type: ',  covar_type , ', number gaussian' , i, 'time = ', timeGausWork)
            t.write('\n--------------------\n')
            t.write(str(text))
    toc = time.time()# Окончание работы типа ковариации
    time_covartype = toc -tic
    
    
        
    #------Лог для типа ковариации -----------
    f = open (r'F:\Science\Notebook\01.10.16. Next_step\Experiment\TimeLog-'+covar_type+'.txt', 'a')
    text =  ('Covar_type: ',  covar_type , ', time = ', time_covartype, '\n')
    f.write('\n--------------------\n')
    f.write(str(text))
    #----------------------------------------
  
    plt.figure()
    plt.title('Covar_type: ' + covar_type)
    tr_a = plt.plot(number_gaussian,acc,color = 'b')

    plt.xlabel('Gaussian Core')
    plt.ylabel('Accuracy')
    plt.grid(True)
    plt.show()