In [34]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import hmmlearn.hmm
from sklearn.cluster import KMeans
from pomegranate import *

path_to_data = "./Data"

In [35]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [48]:

class_names = ["Nha", "Me", "YTe", "ThanhPho", "Hoc",]
test_class_names = ["test_Nha", "test_Me", "test_YTe", "test_ThanhPho", "test_Hoc"]
datas = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    datas[cname] = get_class_data(os.path.join(path_to_data, cname))
print('Done!!!')

Load Nha dataset
Load Me dataset
Load YTe dataset
Load ThanhPho dataset
Load Hoc dataset
Done!!!


In [49]:

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (55545, 36)
centers (10, 36)
centers (10, 36)
Done


# Khởi tạo cho t và e của từng từ

In [50]:
dict_components = {
    #  ɲa̤ː˨˩ -> 2 âm vị -> 6 states
    "Nha": 6,
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": 6,
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": 9,
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": 15,
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": 9,
}

dict_startprob = {
    #  ɲa̤ː˨˩ -> 2 âm vị -> 6 states
    "Nha": [0.3, 0.3, 0.4, 0.0, 0.0, 0.0,], 
    
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": [0.2, 0.6, 0.2, 0.0, 0.0, 0.0,],
    
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": [0.2, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
    
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": [0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
    
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": [0.3, 0.4, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
}

dict_transmat = {
    #  ɲa̤ː˨˩ 
    "Nha": [[0.3, 0.3, 0.4, 0.0, 0.0, 0.0,], 
            [0.0, 0.3, 0.3, 0.4, 0.0, 0.0,], 
            [0.0, 0.0, 0.3, 0.3, 0.4, 0.0,], 
            [0.0, 0.0, 0.0, 0.3, 0.3, 0.4,], 
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.7,], 
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.4,],], 
    
    #  mɛ̰ʔ˨˩ 
    "Me":  [[0.3, 0.3, 0.4, 0.0, 0.0, 0.0,], 
            [0.0, 0.2, 0.4, 0.4, 0.0, 0.0,], 
            [0.0, 0.0, 0.3, 0.4, 0.3, 0.0,], 
            [0.0, 0.0, 0.0, 0.3, 0.3, 0.4,], 
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.7,], 
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.4,],], 
    
    #  i˧˧ te˧˥ 
    "YTe": [[0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.6, 0.3, 0.1, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.4, 0.4, 0.2, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.7, 0.1,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.8,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5,],],
    
    #   tʰa̤jŋ˨˩ fo˧˥ 
    "ThanhPho": [[0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.7,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,],],
                 
#                  [0.0, 0.2, 0.6, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.2, 0.5, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.3, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.3, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15, 0.7, 0.15, 0.0, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6, 0.2, 0.0, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.7, 0.1, 0.0,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.6, 0.1,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.7,],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4,],],
    
    #  ha̰ʔwk˨ 
    "Hoc": [[0.3, 0.6, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.4, 0.5, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.4, 0.4, 0.2, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.2, 0.1,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2,],],
}

In [54]:
dataset = {}
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in datas[cname]])
    hmm = HiddenMarkovModel.from_matrix(matrix=np.array(dict_transmat[cname]),
                                        dists=NormalDistribution.from_samples(),
                                        starts=np.array(dict_startprob[cname]),
                                        #ends=np.array(dict_startprob[cname][::-1])
                                       )
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, algorithm='viterbi')
        models[cname] = hmm
print("Training done")

TypeError: from_matrix() takes at least 3 positional arguments (0 given)

In [33]:

models = {}
for cname in class_names:
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
    hmm = hmmlearn.hmm.MultinomialHMM(
        n_components=dict_components[cname], random_state=0, n_iter=1000, verbose=True,
        init_params='e', params = 'ste',
        startprob_prior=np.array(dict_startprob[cname]),
        transmat_prior=np.array(dict_transmat[cname]),
    )
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")


training class Nha
(8849, 1) [92, 141, 143, 78, 95, 92, 45, 74, 102, 72, 92, 92, 100, 74, 85, 95, 90, 78, 74, 69, 97, 71, 97, 53, 111, 75, 75, 93, 61, 100, 98, 102, 67, 107, 80, 114, 97, 68, 141, 117, 95, 88, 124, 124, 82, 85, 96, 65, 62, 77, 105, 86, 91, 110, 77, 111, 78, 81, 88, 83, 80, 93, 101, 88, 87, 87, 100, 73, 68, 67, 86, 97, 75, 77, 114, 71, 76, 120, 78, 142, 76, 97, 55, 83, 106, 46, 83, 87, 49, 92, 85, 61, 168, 71, 85, 90, 110, 98, 112] 99


         1      -22005.7373             +nan
         2      -15700.7579       +6304.9795
         3      -14196.3071       +1504.4507
         4      -11949.7460       +2246.5612
         5       -9332.2183       +2617.5276
         6       -7643.6078       +1688.6106
         7       -6906.6890        +736.9188
         8       -6433.5350        +473.1540
         9       -5873.9384        +559.5967
        10       -5467.8806        +406.0577
        11       -5387.4754         +80.4053
        12       -5366.0497         +21.4256
        13       -5349.9005         +16.1492
        14       -5325.1899         +24.7106
        15       -5248.0573         +77.1326
        16       -5146.0374        +102.0199
        17       -5059.5372         +86.5002
        18       -5031.5024         +28.0348
        19       -5009.3484         +22.1540
        20       -4958.8454         +50.5030
        21       -4927.5410         +31.3044
        22       -4918.9142          +8.6268
        23

training class Me
(10468, 1) [101, 103, 66, 106, 101, 106, 115, 100, 96, 104, 98, 100, 98, 95, 98, 106, 106, 105, 108, 117, 101, 105, 99, 99, 101, 102, 100, 103, 102, 100, 126, 106, 100, 95, 111, 102, 116, 106, 106, 95, 100, 98, 96, 139, 98, 108, 110, 124, 105, 94, 95, 118, 93, 96, 98, 100, 111, 101, 95, 103, 98, 131, 101, 135, 109, 146, 117, 105, 96, 95, 112, 100, 109, 122, 107, 125, 101, 115, 112, 124, 106, 132, 121, 116, 119, 113, 107, 111, 93, 105, 129, 131, 80, 79, 82, 74, 76, 79, 81, 88] 100


         1      -23763.7620             +nan
         2      -17578.9472       +6184.8148
         3      -17087.2041        +491.7430
         4      -15928.7692       +1158.4349
         5      -13878.4040       +2050.3652
         6      -10247.2918       +3631.1123
         7       -8186.1921       +2061.0996
         8       -7811.8055        +374.3866
         9       -7567.4967        +244.3089
        10       -7409.3141        +158.1826
        11       -7097.8451        +311.4690
        12       -6804.9347        +292.9104
        13       -6729.2076         +75.7271
        14       -6693.5930         +35.6146
        15       -6657.8696         +35.7234
        16       -6594.7166         +63.1530
        17       -6503.5440         +91.1725
        18       -6376.5776        +126.9665
        19       -6026.9874        +349.5901
        20       -5923.6464        +103.3411
        21       -5907.5081         +16.1383
        22       -5901.4405          +6.0676
        23

training class YTe
(12847, 1) [92, 129, 131, 127, 127, 125, 134, 121, 126, 146, 128, 119, 120, 139, 127, 136, 158, 164, 136, 154, 119, 156, 139, 125, 111, 129, 130, 117, 115, 124, 128, 117, 124, 130, 119, 126, 109, 150, 128, 153, 147, 120, 136, 126, 136, 119, 118, 123, 159, 129, 128, 184, 133, 125, 142, 135, 150, 131, 122, 127, 132, 118, 114, 125, 120, 120, 124, 128, 130, 120, 119, 119, 147, 128, 123, 123, 111, 151, 118, 129, 115, 113, 149, 121, 118, 114, 114, 116, 133, 120, 120, 120, 120, 114, 116, 118, 165, 120, 115, 149] 100


         1      -31197.5801             +nan
         2      -21639.1897       +9558.3904
         3      -20062.7073       +1576.4824
         4      -16465.8619       +3596.8454
         5      -12423.8772       +4041.9847
         6      -10665.2111       +1758.6661
         7       -8952.5093       +1712.7018
         8       -8089.5248        +862.9846
         9       -7659.3374        +430.1873
        10       -7346.6269        +312.7106
        11       -7195.8756        +150.7513
        12       -7122.0012         +73.8743
        13       -7111.4144         +10.5868
        14       -7106.1524          +5.2621
        15       -7101.5916          +4.5607
        16       -7099.1070          +2.4847
        17       -7098.0450          +1.0619
        18       -7097.3483          +0.6967
        19       -7096.8336          +0.5148
        20       -7096.4891          +0.3444
        21       -7096.2535          +0.2356
        22       -7096.0610          +0.1925
        23

training class ThanhPho
(12660, 1) [141, 129, 141, 136, 137, 113, 175, 144, 132, 102, 123, 150, 192, 191, 124, 169, 134, 110, 144, 90, 161, 110, 116, 130, 104, 80, 133, 125, 115, 103, 104, 102, 98, 116, 127, 131, 152, 123, 121, 142, 242, 84, 126, 132, 89, 126, 91, 131, 135, 133, 133, 130, 141, 137, 145, 101, 128, 104, 136, 130, 130, 95, 145, 159, 150, 97, 90, 124, 86, 120, 100, 151, 149, 105, 95, 95, 100, 121, 279, 126, 121, 124, 101, 124, 82, 133, 108, 197, 111, 124, 121, 143, 113, 112, 91, 111, 132, 148, 103, 100] 100


         1      -29640.4008             +nan
         2      -24816.3743       +4824.0265
         3      -24491.5112        +324.8630
         4      -23599.6993        +891.8119
         5      -20630.0635       +2969.6358
         6      -15578.6987       +5051.3648
         7      -12573.2358       +3005.4629
         8      -10037.7677       +2535.4681
         9       -8967.3767       +1070.3910
        10       -8848.5805        +118.7963
        11       -8814.0363         +34.5442
        12       -8800.4932         +13.5431
        13       -8792.2357          +8.2575
        14       -8785.4116          +6.8241
        15       -8779.0696          +6.3421
        16       -8773.6073          +5.4623
        17       -8769.7386          +3.8687
        18       -8767.6621          +2.0765
        19       -8766.5867          +1.0754
        20       -8765.5390          +1.0477
        21       -8764.3755          +1.1635
        22       -8763.3728          +1.0027
        23

KeyboardInterrupt: 

In [None]:

print("Testing")
for true_cname in test_class_names:
    for O in dataset[true_cname[5:]]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        print(true_cname, score)

