# Calculate likelihood per frame

In [1]:
%matplotlib notebook
import pdb
import matplotlib.pyplot as plt
import os
import re
import numpy as np
import pickle
import IPython
import mplcursors
import sys 
sys.path.append('..')
from extract_hmm_gmm_params.get_hmm_gmm import GetParam

In [2]:
lib_wav = 'data/lib-0001-fc-008-part.wav'
lib_txt = 'data/lib-0001-fc-008-part.txt'
lib_mfcc = 'data/lib-0001-fc-008-part.mfcc'

In [3]:
with open(lib_txt, 'r') as f:
    line = f.readlines()
    line = line[0]

!cat data/lib-0001-fc-008-part.txt

THE RAIN CONTINUED FAST THOUGH NOT HEAVY

In [4]:
IPython.display.Audio(lib_wav)

## Load MFCC

In [5]:
%%bash
# /home/kaldi/src/featbin/compute-mfcc-feats --use-energy=false --sample-frequency=16000 \
# scp,p:data/lib-0001-fc-001.scp ark,t:data/lib-0001-fc-001.mfcc

In [6]:
def load_mfcc(file):
    '''Load mfcc (txt) file for a single wav file'''
    with open(file, 'r') as f:
        lines = f.readlines()
    mfcc = [re.sub(r'\n|\]', '', i).strip().split(' ') for i in lines[1:]]    
    return np.array(mfcc).astype(np.float)
    
mfcc = load_mfcc(lib_mfcc)
print(mfcc.shape)
mfcc # (300x13)

(300, 13)


array([[  5.50168600e+01,  -2.61352000e+01,   1.11575300e+01, ...,
          1.18221500e+01,   5.88053200e+00,   3.71159900e+00],
       [  5.18446500e+01,  -2.14232300e+01,   5.29893200e+00, ...,
          7.43690100e+00,   1.95482600e+01,   1.31463700e+01],
       [  5.00931800e+01,  -1.66251900e+01,   3.69649900e+00, ...,
          9.49944200e+00,   1.00154100e+01,   1.33833800e+01],
       ..., 
       [  5.41820100e+01,  -1.15077600e+01,   5.37628700e-01, ...,
          7.74043800e+00,   1.94237400e+01,   1.80245800e+01],
       [  5.40556900e+01,  -9.53825900e+00,   1.21408900e-02, ...,
          1.83922600e+01,   2.94135400e+01,   1.59047900e+01],
       [  5.44750400e+01,  -8.54205700e+00,   4.51162300e+00, ...,
          1.41274000e+01,   7.18378500e+00,   5.97816000e+00]])

In [7]:
fig, ax = plt.subplots(figsize=(10,5), facecolor='white')
ax.imshow(mfcc.T, origin='lower', aspect='auto')

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7f0140fd55f8>

## Extract mfcc (39-d)

### scp (wav) -> ark (39-d)

In [8]:
%%bash
/home/kaldi/src/featbin/compute-mfcc-feats --use-energy=false --sample-frequency=16000 \
scp,p:data/lib-0001-fc-008-part.scp ark:- | \
/home/kaldi/src/featbin/add-deltas ark:- ark:data/lib-0001-fc-008-part-39.ark

/home/kaldi/src/featbin/add-deltas ark:- ark:data/lib-0001-fc-008-part-39.ark 
/home/kaldi/src/featbin/compute-mfcc-feats --use-energy=false --sample-frequency=16000 scp,p:data/lib-0001-fc-008-part.scp ark:- 
LOG (compute-mfcc-feats[5.2.38~1-b82b8]:main():compute-mfcc-feats.cc:181)  Done 1 out of 1 utterances.


In [9]:
%%bash
/home/kaldi/src/gmmbin/gmm-compute-likes ../../data/mono/40.mdl \
ark:data/lib-0001-fc-008-part-39.ark ark,t:data/lib-0001-fc-008-part-likes.txt

/home/kaldi/src/gmmbin/gmm-compute-likes ../../data/mono/40.mdl ark:data/lib-0001-fc-008-part-39.ark ark,t:data/lib-0001-fc-008-part-likes.txt 
LOG (gmm-compute-likes[5.2.38~1-b82b8]:main():gmm-compute-likes.cc:82) gmm-compute-likes: computed likelihoods for 1 utterances.


## Load likelihood

In [10]:
# def load_likes(file):
#     '''Load likelihood (txt) file for a single wav'''
lib_likes = 'data/lib-0001-fc-008-part-likes.txt'
# lib_likes = 'data/lib-0001-fc-001-likes.txt'
with open(lib_likes, 'r') as f:
    lines = f.readlines()
    likes = [re.sub(r'\n|\]', '', i).strip().split(' ') for i in lines[1:]]
    likes = np.array(likes).astype(np.float) # 300x232

In [11]:
likes.shape

(300, 232)

In [12]:
mdl_txt = 'model/final.txt'
trans_txt = 'model/trans_prob.txt'
phones_txt = 'model/phones.txt'
sets_txt = 'model/sets.txt'
save_dir = 'result'

K = GetParam(mdl_txt, trans_txt, phones_txt, sets_txt, save_dir)

model/phones.txt is loaded
model/trans_prob.txt is loaded
  nonsilence phones were succefully loaded
  silence phones were succefully loaded
model/final.txt is loaded

        Now try following:

        >> H.hmm['AA_B'].keys() # get each phone info
           dict_keys(['states', 'trans_prob', 'self_loop_prob', 'gmm_id'])
        >> H.hmm['AA_B']['gmm_id'] # get gmm id (=pdf id)
           [10, 11, 12]
        >> H.gmm[10].keys() # access gmm info
           dict_keys(['num_gmm', 'weight', 'mean', 'var'])
        
model/sets.txt is loaded


## Plot MFCC with phone likelihood

In [13]:
def plot_mfcc_likes(mfcc, likes, ph_seq=None):
    fig, ax1 = plt.subplots(figsize=(10,5), facecolor='white')
    im = ax1.imshow(mfcc.T, origin='lower', aspect='auto', cmap='Greys')
    ax1.autoscale(False)
    ax1.set_xticks(range(mfcc.shape[0]))
    if ph_seq:
        ax1.set_xticklabels([ph[0] for ph in ph_seq], rotation=45)
    else:
        ax1.set_xticklabels([str(i) for i in range(mfcc.shape[0])], rotation=45)
    ax2 = ax1.twinx()
    ax2.plot(likes)

    inc = 1
    if ph_seq:
        for i in range(likes.shape[1]):
            ax2.text(i, likes[i,i], K.pdf[i][0])
            inc += 1
    else:
        for i in range(likes.shape[1]):
            ax2.text(i, likes[i,i], str(i))
            inc += 1
    return fig, ax1, ax2

In [14]:
# all phones
ph_seq = []
argmax = np.argmax(likes, axis=1)
for m in argmax:
    ph_seq.append(K.pdf[m])
    
fig, ax1, ax2 = plot_mfcc_likes(mfcc, likes, ph_seq)     

<IPython.core.display.Javascript object>

In [17]:
def plot_mfcc_likes_avg(mfcc, likes, title=line, skip=None):
    fig, ax1 = plt.subplots(figsize=(10,5), facecolor='white')
    # plot MFCCs
    im = ax1.imshow(mfcc.T, origin='lower', aspect='auto', cmap='Greys')
    ax1.autoscale(False)
    ax1.set_ylabel('MFCC coeffs')
    
    # get unique phones
    new_lab = []
    for i in range(likes.shape[1]):
        new_lab.append(re.sub('_.*','',re.sub('([A-Z]+)([0-9])_.*','\\1',K.pdf[i][0])))
    uq_lab = np.unique(new_lab)
    
    # averaged line for phone
    new_likes = np.zeros((likes.shape[0], len(uq_lab)))
    
    # filter
    uq_lab = [l for l in uq_lab if l not in skip]
    idx = [i for i, l in enumerate(uq_lab) if i not in skip]
    new_likes = new_likes[:, idx]
    for ith, lab in enumerate(uq_lab):
        idx = [i for i, nlab in enumerate(new_lab) if nlab == lab] # e.g. AE: [22, 23, 24, 25, 26, 27, 28, 29, 30]
        new_likes[:,ith] = np.mean(likes[:, idx], axis=1)
        
    ax2 = ax1.twinx()
    lines = ax2.plot(new_likes)
    ax2.set_ylabel('-loglikelihood')
    ax2.set_xlabel('Frames')
    print('ylim for likes:', ax2.get_ylim())
    for i,l in enumerate(lines):
        l.set_label('{}'.format(uq_lab[i]))


    cursor = mplcursors.cursor(lines)
    cursor.connect(
        'add', lambda sel: sel.annotation.set_text(sel.artist.get_label()))
            
    colors = [l.get_color() for l in lines]

    inc = 1
    xtick = np.linspace(0, new_likes.shape[0]/3, len(uq_lab)).astype(np.int)
    
    # draw labels
    for i in range(new_likes.shape[1]):
        x, y = xtick[i], new_likes[xtick[i],i]
        ax2.annotate(uq_lab[i], xy=(x, y), xytext=(x*1.2, y+70), color=colors[i],
                    arrowprops=dict(facecolor=colors[i], shrink=0.05, 
                                    width=0.5, headwidth=2, headlength=1))
        inc += 1 
    plt.suptitle('Text: {}'.format(title))
    return fig, ax1, ax2, lines

fig, ax1, ax2, lines = plot_mfcc_likes_avg(mfcc, likes, skip=['AW'])

<IPython.core.display.Javascript object>

ylim for likes: (-616.10519800000009, -76.233161999999993)


In [16]:
new_lab = []
old_lab = []
for i in range(likes.shape[1]):
    new_lab.append(re.sub('_.*','',re.sub('([A-Z]+)([0-9])_.*','\\1',K.pdf[i][0])))
    old_lab.append(K.pdf[i][0])
uq_lab = np.unique(new_lab)
uq_lab_old = np.unique(old_lab)