# Machine Problem #2

## Imports Here

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import scipy
from numpy import linalg
import math
import wave
from scipy.io import wavfile
from scipy import signal
from collections import defaultdict
from functools import partial
import scipy.stats as stats
from sklearn.decomposition import PCA

## Data Extraction

In [2]:
def pca_reduce(signal, n_components, block_size=1024):
    
    # First, zero-pad the signal so that it is divisible by the block_size
    samples = len(signal)
    hanging = block_size - np.mod(samples, block_size)
    padded = np.lib.pad(signal, (0, hanging), 'constant', constant_values=0)
    
    # Reshape the signal to have 1024 dimensions
    reshaped = padded.reshape((len(padded) // block_size, block_size))
    
    # Second, do the actual PCA process
    pca = PCA(n_components=n_components)
    pca.fit(reshaped)
    
    comps = pca.components_
    transformed = pca.transform(reshaped)
    reconstructed = pca.inverse_transform(transformed).reshape((len(padded)))
    return pca, comps, reconstructed

In [3]:
data= {}
fs={}
peeps=['dg', 'ls', 'mh', 'yx']
words=['asr', 'cnn', 'dnn', 'hmm', 'tts']
features= defaultdict(lambda: defaultdict(lambda: defaultdict(partial(np.ndarray, 0))))

for x in range(len(peeps)):
    p=peeps[x]
    for y in range(len(words)):
        w=words[y]
        for i in range(1, 6):
            wav_r=wave.open('data/'+p+'/'+p+'_'+w+str(i)+'.wav', 'rb')
            fs[p, w, i], x = wavfile.read('data/'+p+'/'+p+'_'+w+str(i)+'.wav')
            if wav_r.getnchannels()==2:
                data[p, w, i]=x[:, 0]
                #print(x[:,0],'\nasasas\n')
                _, comps, reconstructed = pca_reduce(x[:, 0], 14, 1024)
            else:
                data[p, w, i]=x
                _, comps, reconstructed = pca_reduce(x, 14, 1024)
            features[p][w][i]=comps
        



## Feature Extraction

In [4]:
# features= defaultdict(lambda: defaultdict(lambda: defaultdict(partial(np.ndarray, 0))))
# peeps=['dg', 'ls', 'mh', 'yx']
# words=['asr', 'cnn', 'dnn', 'hmm', 'tts']

# for p in range(len(peeps)):
#     for w in range(len(words)):
#         for i in range(1, 6):
#             file_mat=[]
#             file_data=open(('feature/'+peeps[p]+'/'+peeps[p]+'_'+words[w]+str(i)+'.fea'), encoding = "ISO-8859-1").read().strip().split('\n')
#             file_mat = np.array([np.float_(line.split(',')) for line in file_data])
#             features[peeps[p]][words[w]][i]=file_mat

In [5]:
features['dg']['cnn'][1].shape

(14, 1024)

## Splitting Testing-Training Data (Speaker Independent)

In [6]:
test_sp_independent=defaultdict(lambda: defaultdict(lambda: defaultdict(partial(np.ndarray, 0))))
train_sp_independent=defaultdict(lambda: defaultdict(lambda: defaultdict(partial(np.ndarray, 0))))
data_per_word_id=[]

peeps_id=['dg', 'ls', 'yx']
for x in range(len(words)):
    w=words[x]
    per_word=[]
    for y in range(len(peeps_id)):
        p=peeps_id[y]
        for i in range(1, 6):
            train_sp_independent[p][w][i]=features[p][w][i]
            per_word.append(features[p][w][i])
    data_per_word_id.append(per_word)
    
test_word_id=[]
true_labels_id = []
for x in range(len(words)):
    w=words[x]
    per_word=[]
    for p in ['mh']:
        for i in range(1, 6):
            test_sp_independent[p][w][i]=features[p][w][i] 
            per_word.append(features[p][w][i])
            true_labels_id.append(w)
        test_word_id.append(per_word)

In [7]:
print(train_sp_independent['dg']['asr'][1])

[[ 0.00040939 -0.00301437 -0.00775597 ... -0.00372121 -0.00549609
  -0.00788802]
 [-0.0170444  -0.01566191 -0.01460274 ...  0.02594256  0.02252512
   0.01776588]
 [ 0.01360221  0.00614428 -0.00132555 ...  0.00389542  0.00192771
  -0.00021314]
 ...
 [ 0.03185443  0.02793878  0.02274978 ... -0.03198364 -0.0234382
  -0.01449342]
 [-0.02182696 -0.02249822 -0.02328165 ... -0.00737874 -0.01098175
  -0.01533739]
 [-0.01733734 -0.01173732 -0.00627662 ...  0.01767267  0.02134773
   0.02001401]]


## Splitting Testing-Training Data (Speaker Dependent)

In [8]:
train_sp_dependent=defaultdict(lambda: defaultdict(lambda: defaultdict(partial(np.ndarray, 0))))
test_sp_dependent=defaultdict(lambda: defaultdict(lambda: defaultdict(partial(np.ndarray, 0))))
data_per_word_d=[]

for x in range(len(words)):
    w=words[x]
    per_word=[]
    for y in range(len(peeps)):
        p=peeps[y]
        for i in range(1, 5):
            train_sp_dependent[p][w][i]=features[p][w][i]
            per_word.append(features[p][w][i])
    data_per_word_d.append(per_word)
        
test_word_d=[]
true_labels_d = []
for x in range(len(words)):
    w=words[x]
    per_word=[]
    for j in range(len(peeps)):
        p=peeps[j]
        for i in range(5, 6):
            test_sp_dependent[p][w][i]=features[p][w][i]
            per_word.append(np.array(features[p][w][i]))
            print(w)
            true_labels_d.append(w)
        test_word_d.append(per_word)
        

asr
asr
asr
asr
cnn
cnn
cnn
cnn
dnn
dnn
dnn
dnn
hmm
hmm
hmm
hmm
tts
tts
tts
tts


In [9]:
true_labels_d

['asr',
 'asr',
 'asr',
 'asr',
 'cnn',
 'cnn',
 'cnn',
 'cnn',
 'dnn',
 'dnn',
 'dnn',
 'dnn',
 'hmm',
 'hmm',
 'hmm',
 'hmm',
 'tts',
 'tts',
 'tts',
 'tts']

## Self Recorded Samples: Test Feature Extraction 

In [10]:
words=['asr', 'cnn', 'dnn', 'hmm', 'tts']
self_features=defaultdict(lambda: defaultdict(partial(np.ndarray, 0)))
test_self=[]=defaultdict(lambda: defaultdict(partial(np.ndarray, 0)))

for w in words:
    for i in range(1, 6):
        self_features[w][i] = open(('Self-Recorded/Self Features/ak_'+w+str(i)+'.txt'), encoding = "ISO-8859-1").read().split('\n')


# for x, y in self_features.items():
#     for z in y:
#         l=z.replace('\t', ',')[:-1].split()
#         test_self.append(l)

## HMM Parameters: $\pi$, A, B

In [11]:
#Create B below:
means_per_word_id=[]
for x in range(len(words)):
    w=words[x]
    for y in range(len(peeps_id)):
        p=peeps_id[y]
        for i in range(1, 6):
            if len(train_sp_independent[p][w][i]) != 0:
                temp_mean=np.mean(train_sp_independent[p][w][i], axis=0)
                #print(temp_mean)
    means_per_word_id.append(temp_mean)
    
cov_per_word_id=[]
for x in range(len(words)):
    w=words[x]
    for y in range(len(peeps_id)):
        p=peeps_id[y]
        for i in range(1, 6):
            if len(train_sp_independent[p][w][i]) != 0:
                temp_cov=np.cov(train_sp_independent[p][w][i].T)
    cov_per_word_id.append(temp_cov)
# # #Initializing b_j(x):
# for i in range(0, 5):
#     for j in range (0, 15):
#         gauss[i][j] = stats.multivariate_normal(mean=means_per_word_id[i], cov=cov_per_word_id[i]).pdf(data_per_word_id[i][j])

In [12]:
len(data_per_word_d[0][0])

14

In [13]:
#Create B below:
means_per_word_d=[]
for x in range(len(words)):
    w=words[x]
    for y in range(len(peeps)):
        p=peeps[y]
        for i in range(1, 5):
            if len(train_sp_dependent[p][w][i]) != 0:
                temp_mean=np.mean(train_sp_dependent[p][w][i], axis=0)
                #print(temp_mean)
    means_per_word_d.append(temp_mean)
    
cov_per_word_d=[]
for x in range(len(words)):
    w=words[x]
    for y in range(len(peeps)):
        p=peeps[y]
        for i in range(1, 6):
            if len(train_sp_dependent[p][w][i]) != 0:
                temp_cov=np.cov(train_sp_dependent[p][w][i].T)
    cov_per_word_d.append(temp_cov)
# # #Initializing b_j(x):
# for i in range(0, 5):
#     for j in range (0, 15):
#         gauss[i][j] = stats.multivariate_normal(mean=means_per_word_id[i], cov=cov_per_word_id[i]).pdf(data_per_word_id[i][j])

In [14]:
len(data_per_word_id)

5

## Defining GaussianHMM Model

In [15]:
class GHMM:
    def __init__(self, mean, cov):
        states = [0,1,2,3,4]
        self.N = len(states)-1
        self.Sigma=[cov+0.5*np.identity(len(cov))]*self.N
        self.mu=[mean]*self.N
        
        self.pi= [1/5, 1/5, 1/5, 1/5, 1/5] #Initial Probabilty Vector
        self.A = [[0.8, 0.2, 0, 0, 0],
             [0, 0.8, 0.2, 0, 0],
             [0, 0, 0.8, 0.2, 0],   #Transition Probabilty Matrix
             [0, 0, 0, 0.8, 0.2]]
             #[0, 0, 0, 0, 1]]
    
    def train(self, X):
        gamma, xi=self.E_step(X)
        self.M_step(X, gamma, xi)
    
    def get_B(self, file): 
#         print("file: \n")
#         print(file)
#         print('----------')
        T=len(file)
        B=np.zeros((self.N,T))
        for i in range(0,self.N):
            for t in range(0,T):
#                 print('m: \n')
#                 print(self.mu[i])
#                 print("\nS: \n")
#                 print(self.Sigma[i])
#                 print('\n----------------\n')
#                 print('X: \n')
#                 print(file[t])
#                 print('\n-----------------------------------------\n')
                B[i,t]=stats.multivariate_normal(self.mu[i], self.Sigma[i]).pdf(file[t])
#                 print("B: \n")
#                 print(B[i][t])
#                 print('----------')
                #B[i,t]=stats.multivariate_normal(np.mean(file, axis=0)[i], np.cov(file.T)[i]).pdf(file[t])
#         print("Bin: \n")
#         print(B)
#         print('----------')
        return B
    
    def E_step(self, X):
        N=self.N
        L=len(X)
        maxTsteps=max(len(xdata) for xdata in X)
        Amat = np.array(self.A)
        
        gamma=np.zeros((N, maxTsteps, L))
        xi=np.zeros((N, N, maxTsteps, L))
        
        for inum, f in enumerate(X):
#             print('f: \n')
#             print(f)
            B=self.get_B(f)
#             print("B: \n")
#             print(B)
            T=len(f)
            tildealpha=np.zeros((N,T))
            tildebeta=np.zeros((N,T))
            log_g = np.zeros((T))
            baralpha = np.zeros((N,T))


            for i in range(0,N):
                baralpha[i,0]=self.pi[i]*B[i,0]
            log_g[0] = np.log(np.sum(baralpha[:,0]))
            tildealpha[:,0]=baralpha[:,0]/np.exp(log_g[0])

            for t in range(1,T):
                for i in range(0,N):
#                     print('B:\n')
#                     print(B[i,t])
#                     print('\n tAlpha:\n')
#                     print(tildealpha[:,t-1])
#                     print('\n A:\n')
#                     print(Amat[:,i])
#                     print('\n t-B: \n')
#                     print(tildebeta[:,t+1]*B[:,t+1])
#                     print('\n inner: \n')
#                     print(np.inner(tildealpha[:,t-1],Amat[:,i]))
#                     print('\n balpha: \n')
#                     print(B[i,t]*np.inner(tildealpha[:,t-1],Amat[:,i]))
#                     print('\n----------------------------------------------\n')
                    baralpha[i,t]=B[i,t]*np.inner(tildealpha[:,t-1],Amat[:,i])
                log_g[t] = np.log(np.sum(baralpha[:,t]))
                tildealpha[:,t]=baralpha[:,t]/np.exp(log_g[t])

            for i in range(0,N):
                tildebeta[i,T-1] = 1/np.exp(log_g[T-1])

            for t in range(T-2,-1,-1):
                for i in range(0,N):
                    tildebeta[i,t]=np.inner(Amat[i,0:N],tildebeta[:,t+1]*B[:,t+1])/np.exp(log_g[t+1])

            for t in range(0,T):
                gamma[:,t, inum] = tildealpha[:,t]*tildebeta[:,t]
                gamma[:,t, inum] = gamma[:,t, inum]/np.sum(gamma[:,t, inum])
    
            for t in range(0,T):
                for i in range(0,N):
                    for j in range(0,N):
                        xi[i,j,t,inum]=tildealpha[i,t]*Amat[i,j]
                        if (t<T-1):
                            if j==N:
                                xi[i,j,t,inum]=0
                            else:
                                xi[i,j,t,inum] = xi[i,j,t,inum]*B[j,t+1]*tildebeta[j,t+1]
                xi[:,:,t,inum]=xi[:,:,t,inum]/np.sum(xi[:,:,t,inum])
                
        return gamma, xi
    
    def M_step(self, X, gamma, xi):
        N=self.N
        L=len(X)
        
        for i in range(0,N):
            for j in range(0,N):
                self.A[i][j]=np.sum(xi[i,j,:])/np.sum(gamma[i,:])
                
        for i in range(0,N):
            self.mu[i]=0
            for l in range(L):
                for t in range(len(X[l])):
                    self.mu[i]+=X[l][t]*gamma[i, t, l]
            self.mu[i]/=np.sum(gamma[i])
            
        for i in range(0,N):
            self.Sigma[i]=0.5*np.identity(len(self.Sigma[i]))
            for l in range(L):
                for t in range(len(X[l])):
                    self.Sigma[i] += gamma[i,t,l]*np.outer(X[l][t]-self.mu[i], X[l][t]-self.mu[i])
            self.Sigma[i]/=np.sum(gamma[i])    
                
    def test(self, file):
        B=self.get_B(file)
#         print("B: \n")
#         print(B)
        T=len(file)
        N=self.N
        tildealpha=np.zeros((N,T))
        log_g = np.zeros((T))
        baralpha = np.zeros((N,T))
        Amat = np.array(self.A)

        for i in range(0,N):
#             print(B[0,0])
#             print('--------------------------')
             baralpha[i,0]=self.pi[i]*B[i,0]
        log_g[0] = np.log(np.sum(baralpha[:,0]))
        tildealpha[:,0]=baralpha[:,0]/np.exp(log_g[0])

        for t in range(1,T):
            for i in range(0,N):
                baralpha[i,t]=B[i,t]*np.inner(tildealpha[:,t-1],Amat[:,i])
            log_g[t] = np.log(np.sum(baralpha[:,t]))
            tildealpha[:,t]=baralpha[:,t]/np.exp(log_g[t])
                
        return sum(log_g)
#         alpha = np.zeros((N,T))
#         beta = np.zeros((N,T))
#         gamma = np.zeros((N,T))
#         xi = np.zeros((2*N,T))
#         Amat = np.array(A)  # Convert to an np matrix so we can compute inner products
#         for i in range(0,N):
#             alpha[i,0]=pi[i]*B[i,0]
#         for t in range(1,T):
#             for i in range(0,N):
#                 alpha[i,t]=B[i,t]*np.inner(alpha[:,t-1],Amat[:,i])
#         for i in range(0,N):
#             beta[i,T-1]=1
#         for t in range(T-2,-1,-1):
#             for i in range(0,N):
#                 beta[i,t]=np.inner(Amat[i,0:N],beta[:,t+1]*B[:,t+1])
#         for t in range(0,T):
#             gamma[:,t]=alpha[:,t]*beta[:,t]
#             gamma[:,t]=gamma[:,t]/np.sum(gamma[:,t])
#         for t in range(0,T):
#             for i in range(0,N):
#                 for j in range(i,i+2):
#                     xi[i+j,t]=alpha[i,t]*Amat[i,j]
#                     if (t<T-1):
#                         if j==N:
#                             xi[i+j,t]=0
#                         else:
#                             xi[i+j,t] = xi[i+j,t]*B[j,t+1]*beta[j,t+1]
#             xi[:,t]=xi[:,t]/np.sum(xi[:,t])

## Training GaussianHMMs (Speaker Independent)

In [16]:
# models_id=np.empty(len(words), dtype=np.object)
# for i in range(len(words)):
#     models_id[i]=GHMM(means_per_word_id[i], cov_per_word_id[i])
    
# for i in range(len(words)):
#     models_id[i].train(data_per_word_id[i])

In [17]:
# #predictions_id
# predictions_id = [0] * len(words)*len(test_word_id[0])

# for j in range(len(words)*len(test_word_id[0])):
#     log_gs_id = [0] * len(words)
#     for i in range(len(words)):
# #         print('jnum: '+str(j))
# #         print('file-in: \n')
# #         print('i: '+str(math.floor(j/len(test_word_id[0]))))
# #         print('j: '+str(j%len(test_word_id[0])))
# #         print(test_word_id[math.floor(j/len(test_word_id[0]))][j%len(test_word_id[0])])
#         log_gs_id[i]=models_id[i].test(test_word_id[math.floor(j/len(test_word_id[0]))][j%len(test_word_id[0])])
#     predictions_id[j]=words[np.argmax(log_gs_id)]
    
# same=0
# for j in range(len(predictions_id)):
#     print(predictions_id[j],', ',true_labels_id[j])
#     if predictions_id[j]==true_labels_id[j]:
#         same+=1
# print(same)

## Training GaussianHMMs (Speaker Dependent)

In [27]:
models_d=np.empty(len(words), dtype=np.object)
for i in range(len(words)):
    models_d[i]=GHMM(means_per_word_d[i], cov_per_word_d[i])
    
for i in range(len(words)):
#     print('data: \n')
#     print(len(data_per_word_d[i][16]))
    models_d[i].train(data_per_word_d[i])

KeyboardInterrupt: 

In [19]:
# #predictions_d
# predictions_d = [0] * len(words)*len(test_word_d[0])

# for j in range(len(words)*len(test_word_d[0])):
#     log_gs_d = [0] * len(words)
#     for i in range(len(words)):
# #         print('jnum: '+str(j))
# #         print('file-in: \n')
# #         print('i: '+str(math.floor(j/len(test_word_d[0])))+'| j: '+str(j%len(test_word_d[0])))
# #         print(test_word_d[math.floor(j/len(test_word_d[0]))][j%len(test_word_d[0])])
#         log_gs_d[i]=models_d[i].test(test_word_d[math.floor(j/len(test_word_d[0]))][j%len(test_word_d[0])])
#     predictions_d[j]=words[np.argmax(log_gs_d)]
    
# same=0
# for j in range(len(predictions_d)):
#     print(predictions_d[j],', ',true_labels_d[j])
#     if predictions_d[j]==true_labels_d[j]:
#         same+=1
# print(same)

In [20]:
test_word_d[4][0]

array([[-0.02682359, -0.02825837, -0.02909743, ...,  0.03330388,
         0.03787388,  0.04118939],
       [ 0.03095887,  0.03722286,  0.04382465, ..., -0.03011884,
        -0.03203723, -0.03473858],
       [ 0.02311902,  0.02759561,  0.02553632, ...,  0.00431988,
         0.00119618, -0.00083926],
       ...,
       [ 0.01390739,  0.00145613,  0.01033041, ..., -0.02221057,
        -0.01963782, -0.0224256 ],
       [ 0.04677068,  0.04196013,  0.03599097, ...,  0.06350328,
         0.05369636,  0.04061373],
       [-0.01077211, -0.00550978, -0.01394606, ...,  0.0114735 ,
         0.02031742,  0.0275134 ]])