In [14]:
import numpy as np
from sklearn.mixture import GMM
from sklearn.cross_validation import StratifiedKFold
import itertools
from scipy import linalg

In [2]:
mfcc_original_train = np.loadtxt('mfcc_original_train.txt')

mfcc_spoof_train = np.loadtxt('mfcc_spoof_train.txt')

mfcc_original_develop = np.loadtxt('mfcc_original_dev.txt')

mfcc_spoof_develop = np.loadtxt('mfcc_spoof_dev.txt')

mfcc_original_test = np.loadtxt('mfcc_original_eva.txt')

mfcc_spoof_test = np.loadtxt('mfcc_spoof_eva.txt')

In [15]:
mfcc_original_train = np.loadtxt('N:\Science\Antispoofing Research (ipython notebook)\gmm t-sne\mfcc_all\mfcc_train_human.txt')

mfcc_spoof_train = np.loadtxt('N:\Science\Antispoofing Research (ipython notebook)\gmm t-sne\mfcc_all\mfcc_spoof_train.txt')

mfcc_original_develop = np.loadtxt('N:\Science\Antispoofing Research (ipython notebook)\gmm t-sne\mfcc_all\mfcc_dev_human.txt')

mfcc_spoof_develop = np.loadtxt('N:\Science\Antispoofing Research (ipython notebook)\gmm t-sne\mfcc_all\mfcc_spoof_dev.txt')

mfcc_original_test = np.loadtxt('N:\Science\Antispoofing Research (ipython notebook)\gmm t-sne\mfcc_all\mfcc_eva_human.txt')

mfcc_spoof_test = np.loadtxt('N:\Science\Antispoofing Research (ipython notebook)\gmm t-sne\mfcc_all\mfcc_spoof_eva.txt')

# Train set

In [16]:
X_train = np.concatenate([mfcc_original_train, mfcc_spoof_train]) 
y_train = np.concatenate([np.zeros((mfcc_original_train.shape[0])), np.ones((mfcc_spoof_train.shape[0]))]).astype('int')

# Development set 

In [17]:
X_develop = np.concatenate([mfcc_original_develop, mfcc_spoof_develop]) 
y_develop = np.concatenate([np.zeros((mfcc_original_develop.shape[0])), np.ones((mfcc_spoof_develop.shape[0]))]).astype('int')


# Test set

In [18]:
X_test = np.concatenate([mfcc_original_test, mfcc_spoof_test]) 
y_test = np.concatenate([np.zeros((mfcc_original_test.shape[0])), np.ones((mfcc_spoof_test.shape[0]))]).astype('int')


In [6]:
def computeUBM(ubm_model, data):
    ###########################################
    # ubm_model - gmm-represent distribution of our model
    # data - samples, which will correct ubm-model
    ###############################################

    xdim = data.shape[1]
    M = ubm_model.n_components

    ###############################################################
    #    ubm_means: means of the ubm <number array>               #
    #    ubm_covars: covariances of the ubm <number array>        #
    #    ubm_weights: weights of the ubm <number array>           #
    #    new_means: means adapted from the ubm <number array>     #
    #    new_weights: weights adapted from the ubm <number array> #
    ###############################################################

    # Copy parameters GMM-model
    ubm_weights = ubm_model.weights_
    ubm_means = ubm_model.means_
    ubm_covars = ubm_model.covars_

    ###################################################################
    # for X = {x_1, ..., x_T}                                         #
    # P(i|x_t) = w_i * p_i(x_t) / sum_j=1_M(w_j * P_j(x_t))           #
    ###################################################################

    posterior_prob = ubm_model.predict_proba(data)
    pr_i_xt = (ubm_weights * posterior_prob) / np.asmatrix(np.sum(ubm_weights \
                                                                  * posterior_prob, axis=1)).T

    n_i = np.asarray(np.sum(pr_i_xt, axis=0)).flatten()  # [M, ]

    # Then we can compute E(x) and E(x2) and calculate new parameters of
    # our model

    E_x = np.asarray([(np.asarray(pr_i_xt[:, i]) * data).sum(axis=0) / n_i[i] if not n_i[i] == 0. else np.zeros(xdim) for i in range(M)])  # [M x xdim]
    E_x2 = np.asarray([(np.asarray(pr_i_xt[:, i]) * (data ** 2)).sum(axis=0) / n_i[i]  if not n_i[i] == 0. else np.zeros(xdim)for i in range(M)])  # [M x xdim]

    ################################################################
    #    T: scaling factor, number of samples                      #
    #    relevance_factor: factor for scaling the adapted means    #
    #    scaleparam - scale parameter for weights matrix estimation#
    ################################################################

    T = data.shape[0]
    relevance_factor = 16
    scaleparam = 1

    ################################################################
    # compute alpha_i: data-depentend apaptation coefficient       #
    # alpha_w = alpha_m = alpha_v                                  #
    # alpha_i = n_i/ (n_i + relevance factor)                      #
    ################################################################

    alpha_i = n_i / (n_i + relevance_factor)

    ###############################
    # Parqameter`s adaptation
    ##############################
    new_weights = (alpha_i * n_i / T + (1.0 - alpha_i) * ubm_weights) * scaleparam

    alpha_i = np.asarray(np.asmatrix(alpha_i).T)
    new_means = (alpha_i * E_x + (1. - alpha_i) * ubm_means)
    new_covars = alpha_i * E_x2 + (1. - alpha_i) * (ubm_covars + (ubm_means ** 2)) - (new_means ** 2)

    #############################################
    # if we want compute `full` covariance matrix - comment code here
    # new_covars = np.zeros([M, xdim, xdim])
    # for j in range(M):
    #    new_covars[j] = alpha_i[j]*E_x2[j] +(1. - alpha_i[j]).flatten()*(ubm_covars[j] + (new_means[j]**2))- (ubm_means[j]**2)
    #    new_covars[i] = np.where(new_covars[i] < MIN_VARIANCE, MIN_VARIANCE, new_covars[i])
    ####################################################################
    #   `covars_` : array
    #    Covariance parameters for each mixture component.  The shape
    #    depends on `covariance_type`::
    #        (n_components, n_features)             if 'spherical',
    #        (n_features, n_features)               if 'tied',
    #        (n_components, n_features)             if 'diag',
    #        (n_components, n_features, n_features) if 'full'
    #####################################################################

    ubm_model.means_ = new_means
    ubm_model.weights_ = new_weights
    ubm_model.covars_ = new_covars

    return ubm_model

# Experiment

In [19]:
n_g = 1024
g1 =  GMM(n_components = n_g, covariance_type='diag',init_params='wmc', n_iter=20)
g1.fit(mfcc_original_train)

g2 =  GMM(n_components = n_g, covariance_type='diag',init_params='wmc', n_iter=20)
g2.fit(mfcc_spoof_train)

prediction  = np.array(np.log(mfcc_original_train.shape[0])+ g1.score(X_develop)  < np.log(mfcc_spoof_train.shape[0])+g2.score(X_develop)).astype('int')

accuracy = np.mean(prediction == y_develop) * 100
print 'accuracy - prediction on development set', accuracy


accuracy - prediction on development set 77.3064528217


In [20]:
prediction  = np.array(np.log(mfcc_original_train.shape[0])+ g1.score(X_test)  < np.log(mfcc_spoof_train.shape[0])+g2.score(X_test)).astype('int')

accuracy = np.mean(prediction == y_test) * 100
print 'accuracy - prediction on evaluation set', accuracy

accuracy - prediction on evaluation set 73.2154693607


In [21]:
g1_adapt = computeUBM(g1, mfcc_original_develop)
g2_adapt = computeUBM(g2, mfcc_spoof_develop)

In [22]:
prediction  = np.array(np.log(mfcc_original_train.shape[0])+ g1_adapt.score(X_test)  < np.log(mfcc_spoof_train.shape[0])+g2_adapt.score(X_test)).astype('int')
accuracy = np.mean(prediction == y_test) * 100
print 'accuracy - prediction on evaluation set. adapted on development', accuracy


accuracy - prediction on evaluation set. adapted on development 83.0072455487


In [13]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(prediction, y_test)
print cm

[[  5311  18163]
 [  4093 165837]]


In [11]:
acc = []
for i in range(1,10):
    g1_adapt = computeUBM(g1_adapt, mfcc_original_develop)
    g2_adapt = computeUBM(g2_adapt, mfcc_spoof_develop)
    
    prediction  = np.array(np.log(mfcc_original_train.shape[0])+ g1_adapt.score(X_test)  < np.log(mfcc_spoof_train.shape[0])+g2_adapt.score(X_test)).astype('int')
    accuracy = np.mean(prediction == y_test) * 100
    print accuracy
    acc.append(accuracy)
    
print np.max(acc)

85.5835453248
84.4046658807
84.2438625882
84.0804740336
84.0220471138
84.0065355422
84.3788132614
84.8426092532
85.1073400757
85.5835453248


# experiment 

In [13]:
def devideData(X,y):
    kf = StratifiedKFold( y, n_folds=5 )
    first_indices, second_indices = next(iter(kf))
    X_first, y_first = X[first_indices], y[first_indices]
    X_second, y_second = X[second_indices], y[second_indices]
    return X_first, y_first, X_second, y_second

In [17]:
model = GMM(n_components = 200, covariance_type='diag',init_params='wmc', n_iter=20)
model.fit(X_train, y_train)

GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
  n_components=200, n_init=1, n_iter=20, params='wmc', random_state=None,
  thresh=None, tol=0.001, verbose=0)

In [18]:
adapt_original = computeUBM(model, mfcc_original_develop)
adapt_spoof = computeUBM(model, mfcc_spoof_develop)

In [25]:
prediction  = np.array(adapt_original.score(X_test)  < adapt_spoof.score(X_test)).astype('int')

accuracy = np.mean(prediction == y_test) * 100
print 'accuracy - prediction on evaluation set', accuracy

accuracy - prediction on evaluation set 4.86236065438
