In [40]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import hmmlearn.hmm
from sklearn.cluster import KMeans
from pomegranate import *

path_to_data = "./Data"

In [41]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

def clustering(X, n_clusters=12):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [42]:

class_names = ["Nha", "Me", "YTe", "ThanhPho", "Hoc",]
test_class_names = ["test_Nha", "test_Me", "test_YTe", "test_ThanhPho", "test_Hoc"]
datas = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    datas[cname] = get_class_data(os.path.join(path_to_data, cname))
print('Done!!!')

Load Nha dataset
Load Me dataset
Load YTe dataset
Load ThanhPho dataset
Load Hoc dataset
Done!!!


In [43]:

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
kmeans = np.nan_to_num(kmeans)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (55545, 36)
centers (12, 36)
centers (12, 36)
Done


# Khởi tạo cho t và e của từng từ

In [44]:
dict_components = {
    #  ɲa̤ː˨˩ -> 2 âm vị -> 6 states
    "Nha": 6,
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": 6,
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": 9,
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": 15,
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": 9,
}

dict_startprob = {
    #  ɲa̤ː˨˩ -> 2 âm vị -> 6 states
    "Nha": [0.3, 0.3, 0.4, 0.0, 0.0, 0.0,], 
    
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": [0.2, 0.6, 0.2, 0.0, 0.0, 0.0,],
    
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": [0.2, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
    
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": [0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
    
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": [0.3, 0.4, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
}

dict_transmat = {
    #  ɲa̤ː˨˩ 
    "Nha": [[0.3, 0.3, 0.4, 0.0, 0.0, 0.0,], 
            [0.0, 0.3, 0.3, 0.4, 0.0, 0.0,], 
            [0.0, 0.0, 0.3, 0.3, 0.4, 0.0,], 
            [0.0, 0.0, 0.0, 0.3, 0.3, 0.4,], 
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.7,], 
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.4,],], 
    
    #  mɛ̰ʔ˨˩ 
    "Me":  [[0.3, 0.3, 0.4, 0.0, 0.0, 0.0,], 
            [0.0, 0.2, 0.4, 0.4, 0.0, 0.0,], 
            [0.0, 0.0, 0.3, 0.4, 0.3, 0.0,], 
            [0.0, 0.0, 0.0, 0.3, 0.3, 0.4,], 
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.7,], 
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.4,],], 
    
    #  i˧˧ te˧˥ 
    "YTe": [[0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.6, 0.3, 0.1, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.4, 0.4, 0.2, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.7, 0.1,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.8,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5,],],
    
    #   tʰa̤jŋ˨˩ fo˧˥ 
    "ThanhPho": [[0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.7,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,],],
                 
#                  [0.0, 0.2, 0.6, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.2, 0.5, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.3, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.3, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15, 0.7, 0.15, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6, 0.2, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.7, 0.1, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.6, 0.1,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.7,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4,],],
    
    #  ha̰ʔwk˨ 
    "Hoc": [[0.3, 0.6, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.4, 0.5, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.4, 0.4, 0.2, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.2, 0.1,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2,],],
}

In [54]:
dataset = {}
models = {}
for cname in class_names:
        class_vectors = datas[cname]
        # convert all vectors to the cluster index
        # dataset['one'] = [O^1, ... O^R]
        # O^r = (c1, c2, ... ct, ... cT)
        # O^r size T x 1
        dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in datas[cname]])
        
        trans_mat = np.array(dict_transmat[cname])
        dists = np.array([NormalDistribution(5,2) for i in range(dict_components[cname])])
        starts = np.array(dict_startprob[cname])
        ends = starts[::-1]
        hmm = HiddenMarkovModel.from_matrix(trans_mat, dists, starts, ends)

        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, algorithm='viterbi', verbose=True)
        models[cname] = hmm
print("Training done")

training class Nha
(8849, 1) [92, 141, 143, 78, 95, 92, 45, 74, 102, 72, 92, 92, 100, 74, 85, 95, 90, 78, 74, 69, 97, 71, 97, 53, 111, 75, 75, 93, 61, 100, 98, 102, 67, 107, 80, 114, 97, 68, 141, 117, 95, 88, 124, 124, 82, 85, 96, 65, 62, 77, 105, 86, 91, 110, 77, 111, 78, 81, 88, 83, 80, 93, 101, 88, 87, 87, 100, 73, 68, 67, 86, 97, 75, 77, 114, 71, 76, 120, 78, 142, 76, 97, 55, 83, 106, 46, 83, 87, 49, 92, 85, 61, 168, 71, 85, 90, 110, 98, 112] 99
[1] Improvement: nan	Time (s): 0.3839
Total Training Improvement: nan
Total Training Time (s): 0.7350
training class Me
(10468, 1) [101, 103, 66, 106, 101, 106, 115, 100, 96, 104, 98, 100, 98, 95, 98, 106, 106, 105, 108, 117, 101, 105, 99, 99, 101, 102, 100, 103, 102, 100, 126, 106, 100, 95, 111, 102, 116, 106, 106, 95, 100, 98, 96, 139, 98, 108, 110, 124, 105, 94, 95, 118, 93, 96, 98, 100, 111, 101, 95, 103, 98, 131, 101, 135, 109, 146, 117, 105, 96, 95, 112, 100, 109, 122, 107, 125, 101, 115, 112, 124, 106, 132, 121, 116, 119, 113, 107, 1

In [None]:

# models = {}
# for cname in class_names:
#     class_vectors = dataset[cname]
#     # convert all vectors to the cluster index
#     # dataset['one'] = [O^1, ... O^R]
#     # O^r = (c1, c2, ... ct, ... cT)
#     # O^r size T x 1
#     dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
#     hmm = hmmlearn.hmm.MultinomialHMM(
#         n_components=dict_components[cname], random_state=0, n_iter=1000, verbose=True,
#         init_params='e', params = 'ste',
#         startprob_prior=np.array(dict_startprob[cname]),
#         transmat_prior=np.array(dict_transmat[cname]),
#     )
#     if cname[:4] != 'test':
#         X = np.concatenate(dataset[cname])
#         lengths = list([len(x) for x in dataset[cname]])
#         print("training class", cname)
#         print(X.shape, lengths, len(lengths))
#         hmm.fit(X, lengths=lengths)
#         models[cname] = hmm
# print("Training done")


In [56]:
for cname in test_class_names:
    print(f"Load {cname} test dataset")
    datas[cname] = get_class_data(os.path.join(path_to_data, cname))
print("Testing")
for true_cname in test_class_names:
    for O in datas[true_cname]:
        score = {cname : model.log_probability(list(O)) for cname, model in models.items() if cname[:4] != 'test' }
        print(true_cname, score)



Load test_Nha test dataset
Load test_Me test dataset
Load test_YTe test dataset
Load test_ThanhPho test dataset
Load test_Hoc test dataset
Testing
test_Nha {'Nha': -16263.055653672833, 'Me': -16250.200321216465, 'YTe': -16239.2850684032, 'ThanhPho': -16224.009427672541, 'Hoc': -16265.144992063035}
test_Nha {'Nha': -11541.69306455922, 'Me': -11520.976047431655, 'YTe': -11506.18918627508, 'ThanhPho': -11478.967252544235, 'Hoc': -11550.001706363964}
test_Nha {'Nha': -11627.481406362322, 'Me': -11614.626073905963, 'YTe': -11603.710821092718, 'ThanhPho': -11588.435180362034, 'Hoc': -11629.570744752522}
test_Nha {'Nha': -7654.459469590903, 'Me': -7644.378849371443, 'YTe': -7634.9414518974545, 'ThanhPho': -7623.7707445369815, 'Hoc': -7654.478368777608}
test_Nha {'Nha': -14857.221367260589, 'Me': -14842.824528005944, 'YTe': -14831.121330434673, 'ThanhPho': -14813.53208517898, 'Hoc': -14860.499470095569}
test_Nha {'Nha': -7596.607772769656, 'Me': -7586.835453909851, 'YTe': -7577.568422406987, '