In [1]:
#!/usr/bin/python

from tensorflow.contrib.learn.python.learn.datasets import base
import tensorflow as tf
import numpy as np
import os,sys
sys.path.insert(0, './scripts')
import ivector_dataset
import siamese_model
import ivector_tools as it

In [2]:
# load i-vector feature from mgb dataset
dataDir ='./mgbdata'
languages = ['EGY','GLF','LAV','MSA','NOR']
trn_labels = []
trn_names = []
trn_ivectors = np.empty((0,400))
dev_labels = []
dev_names = []
dev_ivectors = np.empty((0,400))


for i,lang in enumerate(languages):
    #load train.vardial2017
    filename = dataDir+'/train.vardial2017/%s.ivec' % lang
    name   = np.loadtxt(filename,usecols=[0],dtype='string')
    ivector = np.loadtxt(filename,usecols=range(1,401),dtype='float32')
    trn_labels = np.append(trn_labels, np.ones(np.size(name))*(i+1))
    trn_names=np.append(trn_names,name)
    trn_ivectors = np.append(trn_ivectors, ivector,axis=0)

    #load dev.vardial2017
    filename = dataDir+'/dev.vardial2017/%s.ivec' % lang
    name   = np.loadtxt(filename,usecols=[0],dtype='string')
    ivector = np.loadtxt(filename,usecols=range(1,401),dtype='float32')
    dev_names=np.append(dev_names,name)
    dev_ivectors = np.append(dev_ivectors, ivector,axis=0)
    dev_labels = np.append(dev_labels, np.ones(np.size(name))*(i+1))
    
# load test.MGB3
filename = dataDir+'/test.MGB3/ivec_features'
tst_name   = np.loadtxt(filename,usecols=[0],dtype='string')
tst_ivectors = np.loadtxt(filename,usecols=range(1,401),dtype='float32')

# merge trn+dev
trndev_ivectors = np.append(trn_ivectors, dev_ivectors,axis=0)
trndev_labels = np.append(trn_labels,dev_labels)
trndev_name = np.append(trn_names,dev_names)

# load tst.MGB3 labels
filename = dataDir+'/test.MGB3/reference'
tst_ref_name = np.loadtxt(filename,usecols=[0],dtype='string')
tst_ref_label = np.loadtxt(filename,usecols=[1],dtype='int')

tst_labels_index = []
for i,name in enumerate(tst_name):
    for j, name_ref in enumerate(tst_ref_name):
        if name == name_ref:
            tst_labels_index = np.append(tst_labels_index,j)

tst_labels = np.empty((np.size(tst_labels_index)))
for i,j in enumerate(tst_labels_index):
    tst_labels[i]=tst_ref_label[int(j)]

In [3]:
#center and length norm. no.1
m=np.mean(trn_ivectors,axis=0)
A = np.cov(trn_ivectors.transpose())
[a,D,V] = np.linalg.svd(A)
V= V.transpose()
W= np.dot(V, np.diag(1./( np.sqrt(D) + 0.0000000001 )))

trn_ivectors = np.dot( np.subtract( trn_ivectors, m), W)
trndev_ivectors = np.dot( np.subtract( trndev_ivectors, m), W)
dev_ivectors = np.dot( np.subtract( dev_ivectors, m), W)
tst_ivectors = np.dot( np.subtract( tst_ivectors, m), W)

trn_ivectors = it.length_norm(trn_ivectors.transpose()).transpose()
trndev_ivectors = it.length_norm(trndev_ivectors.transpose()).transpose()
dev_ivectors = it.length_norm(dev_ivectors.transpose()).transpose()
tst_ivectors = it.length_norm(tst_ivectors.transpose()).transpose()


#language modeling
lang_mean=[]
for i, lang in enumerate(languages):
#     lang_mean.append(np.mean( trn_ivectors[np.nonzero(trn_labels == i+1)][:],axis=0 ) )
    lang_mean.append(np.mean(np.append(trndev_ivectors[np.nonzero(trndev_labels == i+1)] ,8*dev_ivectors[np.nonzero(dev_labels == i+1)],axis=0),axis=0))
#     lang_mean.append(np.mean( trndev_ivectors[np.nonzero(trndev_labels == i+1)][:],axis=0 ) )

lang_mean = np.array(lang_mean)
lang_mean = it.length_norm(lang_mean.transpose()).transpose()

print( np.shape(trn_ivectors), np.shape(dev_ivectors), np.shape(lang_mean),np.shape(tst_ivectors) )


((13825, 400), (1524, 400), (5, 400), (1492, 400))


In [4]:
# making pair of train i-vector with mean of each language i-vector
#  example : for total 3 ivectors
#  ivector   ivector_p  label
#     1         1         1
#     1         2         0
#     1         3         0
#     2         1         0
#     2         2         1
#     ...      ...       ...
#     3         3         1

# preparing pair labels
sim = []
pair_a_idx = []
pair_b_idx = []
for i, lang in enumerate(languages):
    for j, label in enumerate(trn_labels):
#         print i, j, label
        pair_a_idx.append(i+1)
        pair_b_idx.append(j)
        if i+1 == label:
            sim.append(1)
        else:
            sim.append(0)
print(np.shape(pair_a_idx),np.shape(pair_b_idx), np.shape(sim))
pair_a_idx=np.array(pair_a_idx)
pair_b_idx=np.array(pair_b_idx)
sim = np.array(sim)

#shuffling
shuffleidx = np.arange(0,np.size(pair_a_idx))
np.random.shuffle(shuffleidx)
pair_a_idx = pair_a_idx[shuffleidx]
pair_b_idx = pair_b_idx[shuffleidx]
sim = sim[shuffleidx]


data = []
data_p = []
    
for iter in np.arange(0,np.size(sim)) :
    data.append( lang_mean[pair_a_idx[iter]-1] )
    data_p.append( trn_ivectors[pair_b_idx[iter]] )
data = np.array(data)
data_p = np.array(data_p)

# TRN dataset loading for feeding 
tar_data = data[sim==1]
tar_data_p = data_p[sim==1]
tar_sim = sim[sim==1]
non_data = data[sim==0]
non_data_p = data_p[sim==0]
non_sim = sim[sim==0]
print(tar_data.shape, tar_data_p.shape,tar_sim.shape,non_data.shape,non_data_p.shape,non_sim.shape)

trn_tar = ivector_dataset.DataSet(tar_data,tar_sim)
trn_tar_p = ivector_dataset.DataSet(tar_data_p,tar_sim)

trn_non = ivector_dataset.DataSet(non_data,non_sim)
trn_non_p = ivector_dataset.DataSet(non_data_p,non_sim)


((69125,), (69125,), (69125,))
((13825, 400), (13825, 400), (13825,), (55300, 400), (55300, 400), (55300,))


In [5]:
# preparing pair labels of DEV dataset
sim = []
pair_a_idx = []
pair_b_idx = []
for i, lang in enumerate(languages):
    for j, label in enumerate(dev_labels):
#         print i, j, label
        pair_a_idx.append(i+1)
        pair_b_idx.append(j)
        if i+1 == label:
            sim.append(1)
        else:
            sim.append(0)
print(np.shape(pair_a_idx),np.shape(pair_b_idx), np.shape(sim))
pair_a_idx=np.array(pair_a_idx)
pair_b_idx=np.array(pair_b_idx)
sim = np.array(sim)

#shuffling
shuffleidx = np.arange(0,np.size(pair_a_idx))
np.random.shuffle(shuffleidx)
pair_a_idx = pair_a_idx[shuffleidx]
pair_b_idx = pair_b_idx[shuffleidx]
sim = sim[shuffleidx]


data = []
data_p = []
    
for iter in np.arange(0,np.size(sim)) :
    data.append( lang_mean[pair_a_idx[iter]-1] )
    data_p.append( dev_ivectors[pair_b_idx[iter]] )
data = np.array(data)
data_p = np.array(data_p)

# DEV dataset loading for feeding 
tar_data = data[sim==1]
tar_data_p = data_p[sim==1]
tar_sim = sim[sim==1]
non_data = data[sim==0]
non_data_p = data_p[sim==0]
non_sim = sim[sim==0]
print(tar_data.shape, tar_data_p.shape,tar_sim.shape,non_data.shape,non_data_p.shape,non_sim.shape)

dev_tar = ivector_dataset.DataSet(tar_data,tar_sim)
dev_tar_p = ivector_dataset.DataSet(tar_data_p,tar_sim)

dev_non = ivector_dataset.DataSet(non_data,non_sim)
dev_non_p = ivector_dataset.DataSet(non_data_p,non_sim)


((7620,), (7620,), (7620,))
((1524, 400), (1524, 400), (1524,), (6096, 400), (6096, 400), (6096,))


In [None]:
# init variables
sess = tf.InteractiveSession()
siamese = siamese_model.siamese();
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(0.005, global_step,
                                           5000, 0.99, staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(siamese.loss, global_step=global_step)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())

(?, 16000)
(?, 1500)
(?, 600)
(?, 16000)
(?, 1500)
(?, 600)


In [None]:
#start training
batch_size = 50
max_acc = 0.40
max_step=0
saver_folder='snnmodel_ivector'
if not os.path.exists(saver_folder):
    os.mkdir(saver_folder)
for step in range(100000):
  
    if step %5 ==0:
        batch_x1_a, batch_y1_a = trn_tar.next_batch(batch_size,shuffle=False)
        batch_x2_a, batch_y2_a = trn_tar_p.next_batch(batch_size,shuffle=False)
        batch_x1_b, batch_y1_b = trn_non.next_batch(batch_size,shuffle=False)
        batch_x2_b, batch_y2_b = trn_non_p.next_batch(batch_size,shuffle=False)
        batch_x1 = np.append(batch_x1_a,batch_x1_b,axis=0)
        batch_y1 = np.append(batch_y1_a,batch_y1_b,axis=0)
        batch_x2 = np.append(batch_x2_a,batch_x2_b,axis=0)
        batch_y2 = np.append(batch_y2_a,batch_y2_b,axis=0)
    else:
        batch_x1_a, batch_y1_a = dev_tar.next_batch(batch_size,shuffle=False)
        batch_x2_a, batch_y2_a = dev_tar_p.next_batch(batch_size,shuffle=False)
        batch_x1_b, batch_y1_b = dev_non.next_batch(batch_size,shuffle=False)
        batch_x2_b, batch_y2_b = dev_non_p.next_batch(batch_size,shuffle=False)
        batch_x1 = np.append(batch_x1_a,batch_x1_b,axis=0)
        batch_y1 = np.append(batch_y1_a,batch_y1_b,axis=0)
        batch_x2 = np.append(batch_x2_a,batch_x2_b,axis=0)
        batch_y2 = np.append(batch_y2_a,batch_y2_b,axis=0)
        

#     batch_x1,batch_y1 = mgb3_siam1.train.next_batch(120,shuffle=False)
#     batch_x2,batch_y2 = mgb3_siam2.train.next_batch(120,shuffle=False)    
#     batch_y = (batch_y1==batch_y2).astype('float')
    batch_y = batch_y1*2-1
#     batch_y = 1-batch_y1
    
    _, loss_v = sess.run([train_step, siamese.loss], feed_dict={
        siamese.x1: batch_x1,
        siamese.x2: batch_x2,
        siamese.y_: batch_y
    })
    
    if np.isnan(loss_v):
        print ('Model diverged with loss = NAN')
        quit()
        
    if step % 10 ==0:
        dev_ivectors_siam = siamese.o1.eval({siamese.x1:dev_ivectors})
        lang_mean_siam = siamese.o1.eval({siamese.x1:lang_mean})
        tst_ivectors_siam = siamese.o1.eval({siamese.x1:tst_ivectors})
#         dev_scores=[]
#         for iter in range(5):
#             dev_scores.append( np.sum(np.power(lang_mean_siam[iter]-dev_ivectors_siam,2),1) )
#         dev_scores = np.array(dev_scores)
        dev_scores = lang_mean_siam.dot(dev_ivectors_siam.transpose() )
        hypo_lang = np.argmax(dev_scores,axis = 0)
        temp = ((dev_labels-1) - hypo_lang)
        acc =1- np.size(np.nonzero(temp)) / float( np.size(dev_labels) )
        
        tst_scores = lang_mean_siam.dot(tst_ivectors_siam.transpose() )
        hypo_lang = np.argmax(tst_scores,axis = 0)
        temp = ((tst_labels-1) - hypo_lang)
        acc_tst =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))

        if max_acc < acc_tst:
            max_acc = acc_tst
            max_step=step
            print ('Step %d: loss %.3f, Acc.: (DEV)%.3f (TST)%.3f, lr : %.5f' % (step,loss_v,acc,acc_tst,sess.run(learning_rate)))
            saver.save(sess, saver_folder+'/model'+str(step)+'.ckpt')
        if loss_v<0.3:
            break


Step 6360: loss 0.958, Acc.: (DEV)0.438 (TST)0.408, lr : 0.00495
Step 6440: loss 0.989, Acc.: (DEV)0.443 (TST)0.411, lr : 0.00495
Step 6520: loss 0.965, Acc.: (DEV)0.446 (TST)0.421, lr : 0.00495
Step 6620: loss 0.934, Acc.: (DEV)0.449 (TST)0.424, lr : 0.00495
Step 6660: loss 0.931, Acc.: (DEV)0.432 (TST)0.426, lr : 0.00495
Step 6740: loss 0.926, Acc.: (DEV)0.458 (TST)0.432, lr : 0.00495
Step 6780: loss 0.964, Acc.: (DEV)0.500 (TST)0.438, lr : 0.00495
Step 6860: loss 0.932, Acc.: (DEV)0.480 (TST)0.440, lr : 0.00495
Step 6870: loss 0.946, Acc.: (DEV)0.495 (TST)0.443, lr : 0.00495
Step 6910: loss 0.955, Acc.: (DEV)0.498 (TST)0.448, lr : 0.00495
Step 7000: loss 0.922, Acc.: (DEV)0.503 (TST)0.448, lr : 0.00495
Step 7010: loss 0.980, Acc.: (DEV)0.510 (TST)0.449, lr : 0.00495
Step 7020: loss 1.008, Acc.: (DEV)0.488 (TST)0.456, lr : 0.00495
Step 7040: loss 0.928, Acc.: (DEV)0.516 (TST)0.458, lr : 0.00495
Step 7070: loss 0.923, Acc.: (DEV)0.518 (TST)0.459, lr : 0.00495
Step 7090: loss 0.944, Ac

In [None]:
print max_step
RESTORE_STEP=max_step
saver.restore(sess, saver_folder+'/model'+str(RESTORE_STEP)+'.ckpt')


trn_ivectors_siam = siamese.o1.eval({siamese.x1:trn_ivectors})
dev_ivectors_siam = siamese.o1.eval({siamese.x1:dev_ivectors})
tst_ivectors_siam = siamese.o1.eval({siamese.x1:tst_ivectors})
lang_mean_siam = siamese.o1.eval({siamese.x1:lang_mean})

tst_scores = lang_mean_siam.dot(tst_ivectors_siam.transpose() )
# print(tst_scores.shape)
hypo_lang = np.argmax(tst_scores,axis = 0)
temp = ((tst_labels-1) - hypo_lang)
acc =1- np.size(np.nonzero(temp)) / float(np.size(tst_labels))
print 'Final accurary on test dataset : %0.3f' %(acc)

In [None]:
confusionmat = np.zeros((5,5))
for i,lang in enumerate(languages):
    hypo_bylang = hypo_lang[ tst_labels == i+1]
    hist_bylang = np.histogram(hypo_bylang,5)
    confusionmat[:,i] = hist_bylang[0]

precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision
recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall
    
print 'Confusion matrix'
print confusionmat
print 'Precision'
print precision
print 'Recall'
print recall

print '\n\n<Performance evaluation on Test dataset>'
print 'Accurary  : %0.3f' %(acc)
print 'Precision : %0.3f' %(np.mean(precision))
print 'Recall    : %0.3f' %(np.mean(recall))