Comparison of GMM performances
Specifically how GMMs adapted from UBMs performe compared to unadapted GMMs

In [1]:
# general imports
import os
import pickle
import numpy as np
import copy

In [2]:
# import Scykit learn GMM library
from sklearn import mixture

In [3]:
# import custom functions
import sys
# path to libraries
# currently in ../scripts-lib/
tool_path = os.path.abspath('../scripts-lib')

if tool_path not in sys.path:
    sys.path.append(tool_path)
import lib_phones as lph

# print the loaded functions
print dir(lph)[5:]

['find_phone_index', 'load_phone_file']


In [4]:
# load phone list
phone_path = os.path.abspath('../datasets/TIMIT-MFCCs/TIMIT_phone_list.txt')
phone_list = lph.load_phone_file(phone_path)
print len(phone_list), phone_list

61 ['aa', 'ae', 'ah', 'ao', 'aw', 'ax', 'ax-h', 'axr', 'ay', 'b', 'bcl', 'ch', 'd', 'dcl', 'dh', 'dx', 'eh', 'el', 'em', 'en', 'eng', 'epi', 'er', 'ey', 'f', 'g', 'gcl', 'h#', 'hh', 'hv', 'ih', 'ix', 'iy', 'jh', 'k', 'kcl', 'l', 'm', 'n', 'ng', 'nx', 'ow', 'oy', 'p', 'pau', 'pcl', 'q', 'r', 's', 'sh', 't', 'tcl', 'th', 'uh', 'uw', 'ux', 'v', 'w', 'y', 'z', 'zh']


In [5]:
# load adapted GMMs pickled file

pickle_dir = os.path.abspath('../datasets/TIMIT Pickled Data')

pickle_name = 'TIMIT_gmm_adapted_dict.pckl'

agmm_dict = pickle.load( open(pickle_dir + os.sep + pickle_name, "rb") )
print "loaded adapted gmm from ", pickle_dir + os.sep + pickle_name

loaded adapted gmm from  C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT Pickled Data\TIMIT_gmm_adapted_dict.pckl


In [6]:
# load un-adapted GMMs pickled file

pickle_name = 'TIMIT_gmm_unadapted_dict.pckl'

ugmm_dict = pickle.load( open(pickle_dir + os.sep + pickle_name, "rb") )
print "loaded unadapted gmm from ", pickle_dir + os.sep + pickle_name

loaded unadapted gmm from  C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT Pickled Data\TIMIT_gmm_unadapted_dict.pckl


In [7]:
#load test mfccs into sklearn observations, each frame is an obs

test_TIMIT_dir = os.path.abspath('../datasets/TIMIT-MFCCs/test')

#create individual obs list for each phone type
test_phone_obs_dict = {}
for phone in phone_list:
    test_phone_obs_dict[phone] = []

# complete obs 
test_obs = []
test_obs_labels = []

# walk the directories
for (path, dirs, files) in os.walk(test_TIMIT_dir):
    print "working in path : " + path

    for file in files:
        # skip the SA files
        #dev, only work on file si1573.mfcc.csv     "si1573" in file and
        if ".mfcc" in file  and "sa" not in file:
            #check if corresponding .phn file exists
            if not os.path.exists(path + "/" + file[:-8] + "phn"):
                print path + "/" + file[:-8] + "phn"
                print "corresponding .phn file does not exist!"
            else:
                
                print "working on: " + file
#                 print "from path : " + path

                # open the files
                mfcc_file = open(path + "/" + file)
                phn_file = open(path + "/" + file[:-8] + "phn")

                # extract phone times
                phone_times = []
                for phn_line in phn_file:
                    phone_times.append(phn_line.split())
                # transpose for easier use
                phone_times = map(list, zip(*phone_times))

                # skip mfcc_file header
                next(mfcc_file)

                # reset frame count
                frame_cnt = 0

                # for each line of mfcc_file
                for mfcc_line in mfcc_file:

                    # increment frame count
                    frame_cnt += 1 

                    # print "frame line #:", frame_cnt 

                    # frame start time in seconds
                    start_t = mfcc_line.split(";")[1]

                    # create frame (skiping first 2 values, frame_index and frame_time)
                    frame = map( float,  mfcc_line.split(";")[2:])
                    # print numpy.shape(frame)
                    # print frame

                    # find correspond phoneme and index in the list
                    phn_index = lph.find_phone_index(start_t, phone_times, phone_list)

                    # add to instances for corresponding phone
                    test_phone_obs_dict[phone_list[phn_index]].append(frame)
                    # add to instances
                    test_obs.append(frame)
                    test_obs_labels.append(phone_list[phn_index])

working in path : C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT-MFCCs\test
working in path : C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT-MFCCs\test\dr1
working in path : C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT-MFCCs\test\dr1\faks0
working on: si1573.mfcc.csv
working on: si2203.mfcc.csv
working on: si943.mfcc.csv
working on: sx133.mfcc.csv
working on: sx223.mfcc.csv
working on: sx313.mfcc.csv
working on: sx403.mfcc.csv
working on: sx43.mfcc.csv
working in path : C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT-MFCCs\test\dr1\fdac1
working on: si1474.mfcc.csv
working on: si2104.mfcc.csv
working on: si844.mfcc.csv
working on: sx124.mfcc.csv
working on: sx214.mfcc.csv
working on: sx304.mfcc.csv
working on: sx34.mfcc.csv
working on: sx394.mfcc.csv
working in path : C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT-MFCCs\test\dr1\felc0
working on: si1386.mfcc.csv
working on: si2016.mfcc.csv
working on: si756.mfcc.csv
working o

In [8]:
# average number of observations for each phone
np.mean([len(phn_list) for phn_list in test_phone_obs_dict.values()])

6736.3934426229507

In [10]:
# compute log probs
for phone in phone_list:
    if phone in agmm_dict and phone in ugmm_dict:
        print 'Phone:', phone
        print 'adapted gmm average log prob:', np.mean(agmm_dict[phone].score(test_phone_obs_dict[phone]))
        print 'unadapted gmm average log prob:', np.mean(ugmm_dict[phone].score(test_phone_obs_dict[phone]))

    else:
        print 'no model for', phone
        

Phone: aa
adapted gmm average log prob: -161.241028206
unadapted gmm average log prob: -161.460239068
Phone: ae
adapted gmm average log prob: -252.172169177
unadapted gmm average log prob: -235.312718818
Phone: ah
adapted gmm average log prob: -145.605917653
unadapted gmm average log prob: -136.22610333
Phone: ao
adapted gmm average log prob: -213.500598779
unadapted gmm average log prob: -214.847719368
Phone: aw
adapted gmm average log prob: -205.681566773
unadapted gmm average log prob: -205.184926423
Phone: ax
adapted gmm average log prob: -162.184327552
unadapted gmm average log prob: -148.28232218
Phone: ax-h
adapted gmm average log prob: -1568342.92987
unadapted gmm average log prob: -1568342.92987
Phone: axr
adapted gmm average log prob: -197.904475941
unadapted gmm average log prob: -187.046014422
Phone: ay
adapted gmm average log prob: -325.915058203
unadapted gmm average log prob: -325.138702611
Phone: b
adapted gmm average log prob: -2376.40560904
unadapted gmm average log p

In [13]:
# average difference
agmm_log_prob_avgs = [np.mean(agmm_dict[phone].score(test_phone_obs_dict[phone])) for phone in agmm_dict.keys()]
ugmm_log_prob_avgs = [np.mean(ugmm_dict[phone].score(test_phone_obs_dict[phone])) for phone in ugmm_dict.keys()]

In [27]:
print [a>b for (a,b) in zip(agmm_log_prob_avgs, ugmm_log_prob_avgs)].count(True)
print [a>b for (a,b) in zip(agmm_log_prob_avgs, ugmm_log_prob_avgs)].count(False)

20
34
