In [1]:
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal as mvn
from matplotlib import pyplot as plt

In [2]:
FEA_WP = pd.read_pickle('./work_shared/fea_word_person.pkl') # speaker dependent features
FEA_PW = pd.read_pickle('./work_shared/fea_person_word.pkl') # speaker independent features

In [3]:
def forward(alpha_prev, A, b):
    alpha_next = np.zeros((5))
    for j in range(5):
        for i in range(5):
            alpha_next[j] += alpha_prev[i] * A[i][j] * b[j]
    g_t = np.sum(alpha_prev)
#     print('unnormalized alpha: ', alpha_next)
    alpha_next = np.divide(alpha_next, g_t)
#     print('normalized alpha: ', alpha_next)
#     print('g', g_a)
    return alpha_next, g_t
        
def backward(beta_next, A, b, scale):
    beta_prev = np.zeros((5))
    for i in range(5):
        for j in range(5):
            beta_prev[i] += beta_next[j] * A[i][j] * b[j]
#     beta_prev = beta_next * b
#     beta_prev = np.dot(beta_next,A.T)
#     print('unnormalized beta: ', beta_prev)
#     g_b = np.sum(beta_prev)
#     beta_prev = np.divide(beta_prev, scale)
#     print('g normalized beta: ', np.divide(beta_prev, scale))
    beta_prev = np.divide(beta_prev, scale)
#     print('normalized beta: ', beta_prev)
    return beta_prev

def find_alpha(alpha_0, A, x, mu, sigma):
    T = x.shape[0]
    alpha = np.zeros((T, 5))
    scale = np.ones(T)
    alpha[0] = alpha_0
    for t in range(1, T):
        b = []
        for i in range(5):
            b.append(mvn.pdf(x[t], mu[i], sigma[i], allow_singular = True))
        alpha[t], scale[t] = forward(alpha[t-1], A, b)
#     print('normalized alpha: ', alpha[-1])
    return alpha, scale

def find_beta(beta_T, A, x, mu, sigma, scale):
    T = x.shape[0]
    beta = np.zeros((T, 5))
    beta[-1] = beta_T
    for t in range(T-2, -1, -1):
        b = []
        for i in range(5):
            b.append(mvn.pdf(x[t+1], mu[i], sigma[i], allow_singular = True))
#         print('unnormalized: ', backward(beta[t+1], A, b, 1))
        beta[t] = backward(beta[t+1], A, b, scale[t+1])
#     print('normalized beta ', beta[0])
    return beta

In [4]:
def xi_t(alpha, A, b, beta):
    xi_t = np.zeros((5, 5))
    for i in range(5):
        for j in range(5):
            xi_t[i][j] = alpha[i] * A[i][j] * b[j] * beta[j]
    norm = np.sum(xi_t)
    xi_t = np.divide(xi_t, norm)
    return xi_t
    
def find_xi(alpha, A, x, mu, sigma, beta):
    T = x.shape[0]
    xi = np.zeros((T, 5, 5))
    for t in range(T):       
        if t < T-1:
            b = []
            for i in range(5):
                b.append(mvn.pdf(x[t+1], mu[i], sigma[i], allow_singular = True))
            xi[t] = xi_t(alpha[t], A, b, beta[t+1])           
        else:
            xi[t] = xi_t(alpha[t], A, np.ones(5), np.ones(5))
#     print('xi: ', xi[0])
    return xi

def find_gamma(alpha, beta):
#     T = xi.shape[0]
#     gamma = np.zeros((T, 5))
#     for t in range(T):
#         for i in range(5):
#             gamma[t][i] = np.sum(xi[t][i])
    gamma = alpha * beta
    norm = np.sum(gamma, axis = 1).reshape(-1, 1)
    gamma = np.divide(gamma, norm) 
#     print('gamma: ', gamma[1])
    return gamma

In [5]:
def update_pi(alpha_set, beta_set):
    new_pi = np.zeros(5)
    for l in range(len(alpha_set)):
        new_pi += np.sum(alpha_set[l], axis = 0) * np.sum(beta_set[l], axis = 0)
    new_pi = new_pi/np.sum(pi)
    return new_pi

def update_A(xi_set, gamma_set):
    num = np.zeros((5,5))
    denom = np.zeros(5)
    for l in range(len(xi_set)):
        num += np.sum(xi_set[l], axis = 0)
        denom += np.sum(gamma_set[l], axis = 0)
    new_A = np.divide(num, denom)
    return A

def update_mu(x_set, gamma_set):
    new_mu = np.zeros((5, 14))
    for i in range(5):
        num = np.zeros((1, 14))
        denom = 0
        for l in range(len(x_set)):
            x = x_set[l]
            gamma = gamma_set[l]
            T = x.shape[0]        
            for t in range(T):
                num += np.multiply(x[t], gamma[t,i])
            denom += np.sum(gamma[:,i])
        new_mu[i] = np.divide(num, denom)
    return new_mu

def update_sigma(x_set, gamma_set, mu):
    new_sigma = np.zeros((5, 14, 14))
    for i in range(5):
        num = np.zeros((14,14))
        denom = 0
        for l in range(len(x_set)):
            x = x_set[l]
            gamma = gamma_set[l]
            T = x.shape[0] 
            for t in range(T):
                std = np.subtract(x[t], mu[i])
#                 print(std.shape)
#                 print(np.multiply(gamma[t,i], np.transpose(std)).shape)
                num += np.multiply(np.multiply(gamma[t,i], np.transpose(std)), std)
            denom += np.sum(gamma[:,i])
        new_sigma[i] = np.divide(num, denom)
    return new_sigma

In [6]:
# speaker dependent tests

num_words = 5
num_speakers = 4
num_utterances = 5
words = ['cnn', 'dnn', 'asr', 'tts', 'hmm']
speakers = ['mh', 'ls', 'dg', 'yx']
word_pi = []
word_A = []
word_mu = []
word_sigma = []

for w in range(num_words):
    # initialize parameters

    pi = np.array([1/5] * 5)

    A = np.array([[.8, .2,   0,   0,   0],
                  [0,  .8,  .2,   0,   0],
                  [0,   0,  .8,  .2,   0],
                  [0,   0,   0,  .8,  .2],
                  [0,   0,   0,   0,   1]])
    
    word = words[w]
    alpha_set = []
    beta_set = []
    x_set = []
    xi_set = []
    gamma_set = []
    
    first_file = FEA_WP[word]['mh'][0]
    mu_0 = np.mean(first_file, axis = 0)
    mu = np.array([mu_0]*5)
    sigma_0 = np.cov(first_file, rowvar = False)
    sigma = np.array([sigma_0]*5)
    
    X = []
    for s in range(num_speakers):
        speaker = speakers[s]

        for u in range(num_utterances-1):
#             x += (list(FEA_WP[word][speaker][u].flatten()))
            X.append(FEA_WP[word][speaker][u])

#     x = np.array(x).reshape((-1, 14))
#     print(x.shape)
    for epo in range(40):
        print('iteration: ', epo)
        for l in range(len(X)):
            x = X[l]
            alpha_0 = np.zeros(5)
            for i in range(5):
                sigma[i] = np.diag(np.diag(sigma[i]))
                alpha_0[i] = np.multiply(pi[i], mvn.pdf(x[0], mu[i], sigma[i], allow_singular = True))
            alpha_0 = alpha_0/np.sum(alpha_0)
#             print('alpha initialized as ', alpha_0)
#             print('current word: ', word)
            alpha, scale = find_alpha(alpha_0, A, x, mu, sigma)
            beta_T = np.ones(5)
            beta = find_beta(beta_T, A, x, mu, sigma, scale)

            xi = find_xi(alpha, A, x, mu, sigma, beta)
            gamma = find_gamma(alpha, beta)

            alpha_set.append(alpha)
            beta_set.append(beta)
            x_set.append(x)
            xi_set.append(xi)
            gamma_set.append(gamma)
                
        pi = update_pi(alpha_set, beta_set)
        A = update_A(xi_set, gamma_set)
        mu = update_mu(x_set, gamma_set)
        sigma = update_sigma(x_set, gamma_set, mu)
    
    word_pi.append(pi)
    word_A.append(A)
    word_mu.append(mu)
    word_sigma.append(sigma)
    
word_pi = np.array(word_pi)   
word_A = np.array(word_A)
word_mu = np.array(word_mu)
word_sigma = np.array(word_sigma)

current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word:  cnn
current word

current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word:  dnn
current word

current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word:  asr
current word

current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word:  hmm
current word

In [7]:
confusion_matrix = np.zeros((5, 5))
alpha_debug = []
for w in range(num_words):
    word = words[w]
    print(word)
    for s in range(num_speakers):
        speaker = speakers[s]
        x = FEA_WP[word][speaker][-1]
        alpha_sum = []
        for i in range(5):
            pi = word_pi[i]
            A = word_A[i]
            mu = word_mu[i]
            sigma = word_sigma[i]
#             print(mu)
#             print(sigma)
            alpha_0 = np.zeros(5)
            for k in range(5):
                sigma[k] = np.diag(np.diag(sigma[i]))
                alpha_0[k] = np.multiply(pi[k], mvn.pdf(x[0], mu[k], sigma[k]))
#             print(pi)
            alpha_0 = alpha_0/np.sum(alpha_0)
            alpha, scale = find_alpha(alpha_0, A, x, mu, sigma)
            alpha_debug.append(alpha)
#             alpha_T = alpha[-1]
#             print(np.sum(np.log(scale)))
#             print('%s '%words[i], np.sum(np.log(scale[-1])))
#             g_T = g_a[-1]
#             print(alpha_T)
            alpha_sum.append(np.sum(np.log(scale)))
        confusion_matrix[w, np.argmax(alpha_sum)]+=1

confusion_matrix /= 4

cnn
dnn
asr
tts
hmm


In [8]:
print(confusion_matrix)

[[0.   0.   0.25 0.   0.75]
 [0.   0.25 0.   0.   0.75]
 [0.   0.   0.5  0.   0.5 ]
 [0.   0.   0.   0.75 0.25]
 [0.   0.   0.   0.   1.  ]]
