In [8]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [9]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [10]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [11]:
def clustering(X, n_clusters=14):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [13]:
class_names = ["nguoi", "duoc", "co_the", "khong", "benh_nhan"]
dataset = {}

for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("hmm_data_3/hmm_data", cname))

Load nguoi dataset
Load duoc dataset
Load co_the dataset
Load khong dataset
Load benh_nhan dataset


In [14]:
all_vector = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("all_vector", all_vector.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vector)


all_vector (15157, 36)
centers (14, 36)


In [15]:
def get_start_config(cname):
    if cname == 'va' :
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,1.0],

        ])
        return startprob, transmat
    
#     if cname == 'cua' :
#         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
#         transmat=np.array([
#             [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
    if cname == 'duoc':
        startprob=np.array([0.5,0.4,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
        transmat=np.array([
            [0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'nguoi':
        startprob=np.array([0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'co_the':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'khong':
        startprob=np.array([0.5,0.4,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
        transmat=np.array([
            [0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'benh_nhan':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    return None

In [16]:
trainset={}
testset={}

for cname in class_names :
    n = len(dataset[cname])
    ntrain = math.floor(n*0.7)
    trainset[cname] = dataset[cname][:ntrain]
    testset[cname] = dataset[cname][ntrain:]

In [17]:
def get_ncomponent(cname) :
    if cname == 'va' :
        return 2    
    if cname == 'cua' :
        return 3
    if cname == 'duoc':
        return 3
    if cname == 'nguoi':
        return 4
    if cname == 'khong':
        return 3
    if cname == 'benh_nhan':
        return 6
    if cname == 'co_the':
        return 4

In [18]:
models = {}
for cname in class_names:
    class_vectors = trainset[cname]
    
    trainset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in testset[cname]])
    startprob_, transmat_ = get_start_config(cname)
    n_components = get_ncomponent(cname)
    print(n_components)
    print(transmat_.shape)
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=n_components*3, random_state=0,n_iter=1000, verbose=True)
    hmm.startprob_ = startprob_
    hmm.transmat_ = transmat_
    hmm.init_params = 'e'
    hmm.params = 'ste'
#         if cname[:4] != 'test':
    X = np.concatenate(trainset[cname])
    lengths = list([len(x) for x in trainset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
    
print("Training done")

4
(12, 12)
training class nguoi
(1752, 1) [18, 22, 41, 43, 32, 26, 26, 16, 18, 24, 21, 27, 22, 20, 16, 19, 18, 18, 21, 21, 15, 28, 20, 49, 21, 20, 23, 29, 34, 40, 41, 30, 23, 17, 17, 13, 27, 32, 20, 28, 26, 36, 19, 42, 19, 19, 14, 20, 28, 28, 28, 27, 36, 30, 23, 22, 28, 20, 27, 26, 24, 29, 25, 24, 20, 24, 25, 22, 27, 18] 70


         1       -4715.4312             +nan
         2       -3364.1986       +1351.2326
         3       -3006.7939        +357.4047
         4       -2732.8035        +273.9904
         5       -2590.4938        +142.3097
         6       -2516.9037         +73.5901
         7       -2464.1083         +52.7954
         8       -2425.7388         +38.3695
         9       -2403.8067         +21.9321
        10       -2392.4755         +11.3312
        11       -2386.1336          +6.3419
        12       -2382.0225          +4.1111
        13       -2378.9296          +3.0930
        14       -2376.3586          +2.5709
        15       -2374.1478          +2.2109
        16       -2372.5349          +1.6129
        17       -2371.4557          +1.0792
        18       -2370.7250          +0.7307
        19       -2370.2582          +0.4668
        20       -2369.9592          +0.2989
        21       -2369.7507          +0.2085
        22       -2369.5913          +0.1595
        23

3
(9, 9)
training class duoc
(1115, 1) [15, 17, 16, 21, 11, 18, 18, 10, 13, 13, 19, 20, 22, 17, 21, 20, 17, 16, 21, 20, 24, 23, 41, 41, 15, 18, 13, 14, 14, 15, 10, 15, 13, 17, 18, 18, 15, 11, 13, 22, 15, 15, 15, 14, 12, 10, 12, 14, 14, 12, 17, 23, 19, 20, 19, 21, 19, 19, 19, 18, 16, 14, 14, 15, 14] 65


         3       -2112.9763        +258.4471
         4       -2016.4106         +96.5658
         5       -1967.1851         +49.2255
         6       -1938.3206         +28.8645
         7       -1919.9565         +18.3641
         8       -1906.9221         +13.0344
         9       -1896.5253         +10.3969
        10       -1886.9904          +9.5348
        11       -1877.3827          +9.6077
        12       -1868.8451          +8.5377
        13       -1862.7922          +6.0528
        14       -1858.1872          +4.6050
        15       -1853.5958          +4.5914
        16       -1849.2600          +4.3358
        17       -1846.2560          +3.0040
        18       -1844.5657          +1.6903
        19       -1843.6311          +0.9346
        20       -1843.0701          +0.5609
        21       -1842.7011          +0.3690
        22       -1842.4411          +0.2600
        23       -1842.2481          +0.1930
        24       -1842.0982          +0.1499
        25

4
(12, 12)
training class co_the
(2961, 1) [34, 31, 56, 39, 44, 40, 46, 35, 45, 47, 37, 56, 36, 31, 39, 35, 42, 30, 41, 41, 52, 52, 43, 43, 33, 49, 59, 35, 42, 40, 37, 40, 35, 36, 28, 45, 44, 23, 30, 48, 35, 32, 35, 35, 26, 36, 43, 45, 62, 20, 22, 36, 53, 44, 44, 28, 57, 53, 36, 37, 30, 34, 38, 51, 43, 40, 47, 38, 44, 102, 44, 52] 72


         1       -7513.6157             +nan
         2       -5671.8081       +1841.8075
         3       -4965.8485        +705.9596
         4       -4692.6178        +273.2307
         5       -4516.2022        +176.4157
         6       -4425.1684         +91.0337
         7       -4383.2512         +41.9172
         8       -4356.7469         +26.5043
         9       -4336.9552         +19.7917
        10       -4320.1794         +16.7758
        11       -4308.8994         +11.2800
        12       -4302.8903          +6.0091
        13       -4299.8567          +3.0336
        14       -4298.1061          +1.7506
        15       -4296.6755          +1.4306
        16       -4295.4039          +1.2716
        17       -4294.2914          +1.1125
        18       -4293.3445          +0.9468
        19       -4292.5455          +0.7991
        20       -4291.8571          +0.6884
        21       -4291.2384          +0.6187
        22       -4290.6520          +0.5863
        23

3
(9, 9)
training class khong
(2207, 1) [28, 32, 37, 32, 35, 26, 42, 37, 23, 30, 27, 36, 25, 33, 27, 49, 23, 26, 33, 26, 28, 23, 35, 24, 20, 28, 19, 25, 32, 26, 21, 20, 32, 32, 25, 22, 27, 27, 21, 49, 31, 24, 24, 24, 17, 43, 33, 40, 43, 45, 38, 42, 28, 44, 42, 31, 35, 38, 47, 30, 64, 35, 21, 30, 30, 28, 30, 31, 34, 42] 70


         2       -4668.3893       +1268.4555
         3       -4080.2075        +588.1818
         4       -3904.6085        +175.5990
         5       -3835.5127         +69.0958
         6       -3778.6619         +56.8508
         7       -3705.6450         +73.0168
         8       -3652.9912         +52.6539
         9       -3625.9309         +27.0603
        10       -3606.5051         +19.4258
        11       -3593.6808         +12.8243
        12       -3575.2907         +18.3901
        13       -3560.7289         +14.5618
        14       -3556.1481          +4.5808
        15       -3550.4558          +5.6923
        16       -3548.9122          +1.5436
        17       -3544.4926          +4.4196
        18       -3540.7891          +3.7035
        19       -3539.7111          +1.0780
        20       -3539.0930          +0.6181
        21       -3538.5954          +0.4976
        22       -3537.8209          +0.7746
        23       -3536.5341          +1.2868
        24

6
(18, 18)
training class benh_nhan
(2863, 1) [42, 37, 37, 40, 38, 47, 35, 41, 41, 44, 32, 35, 28, 32, 44, 47, 36, 38, 46, 46, 34, 33, 37, 28, 30, 45, 36, 44, 34, 35, 63, 46, 46, 48, 44, 34, 48, 38, 43, 44, 55, 45, 26, 29, 43, 33, 30, 37, 30, 45, 42, 42, 45, 41, 44, 52, 41, 43, 34, 45, 51, 41, 62, 40, 44, 47, 52, 39, 54, 35] 70


         1       -7529.1397             +nan
         2       -5778.8899       +1750.2498
         3       -5058.4557        +720.4342
         4       -4775.6639        +282.7919
         5       -4651.7992        +123.8646
         6       -4570.5631         +81.2362
         7       -4505.1522         +65.4109
         8       -4457.7777         +47.3745
         9       -4418.9828         +38.7949
        10       -4384.8529         +34.1299
        11       -4361.0185         +23.8345
        12       -4346.6021         +14.4164
        13       -4337.4553          +9.1468
        14       -4331.1434          +6.3119
        15       -4326.1484          +4.9951
        16       -4321.9917          +4.1567
        17       -4318.7688          +3.2229
        18       -4316.4391          +2.3297
        19       -4313.9115          +2.5276
        20       -4309.9331          +3.9784
        21       -4307.9205          +2.0126
        22       -4306.3888          +1.5318
        23

Training done


        65       -4266.3734          +0.0119
        66       -4266.3638          +0.0096


In [20]:
print("Testing")
true_names=["duoc", "nguoi", "co_the", "benh_nhan", "khong"]
prid_correct={'duoc' : 0, 'nguoi':0, 'co_the':0, 'benh_nhan':0, 'khong':0}
# #true_label=[1, 2, 3, 4]
for true_cname in true_names:
    for O in testset[true_cname]:
        score = {cname : model.score(O) for cname, model in models.items()}
        if (str(max(score, key=score.get)) == true_cname):
            prid_correct[str(max(score, key=score.get))]+=1
        print(true_cname, ' predict ', max(score, key=score.get))

Testing
duoc  predict  benh_nhan
duoc  predict  benh_nhan
duoc  predict  benh_nhan
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  nguoi
duoc  predict  duoc
duoc  predict  benh_nhan
duoc  predict  benh_nhan
duoc  predict  duoc
duoc  predict  benh_nhan
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  benh_nhan
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  nguoi
duoc  predict  duoc
duoc  predict  duoc
nguoi  predict  benh_nhan
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  duoc
nguoi  predict  nguoi
nguoi  predict  duoc
nguoi  predict  nguoi
nguoi  predict  benh_nhan
nguoi  predict  nguoi
nguoi  predict 

In [21]:
avg = 0
for true_cname in true_names:
    avg += (prid_correct[true_cname]/len(testset[true_cname]))
    print (true_cname, " : ", prid_correct[true_cname]/len(testset[true_cname]))
print('avg = ', avg/5)

duoc  :  0.6785714285714286
nguoi  :  0.7666666666666667
co_the  :  0.967741935483871
benh_nhan  :  0.9333333333333333
khong  :  0.8
avg =  0.8292626728110599


In [167]:
record_class_name = ['benh_nhan_record', 'duoc_record','khong_record', 'co_the_record']
recordtestset = {}

for cname in record_class_name:
    print(f"Load {cname} dataset")
    recordtestset[cname] = get_class_data(os.path.join("hmm_data", cname))
for cname in record_class_name:
    class_vectors = recordtestset[cname]
    recordtestset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in recordtestset[cname]])
print ('------------------ predict --------------------')
for true_cname in record_class_name:
    for O in recordtestset[true_cname]:
        score = {cname : model.score(O) for cname, model in models.items()}
        print(true_cname, ' predict ', max(score, key=score.get))

Load benh_nhan_record dataset
Load duoc_record dataset
Load khong_record dataset
Load co_the_record dataset
------------------ predict --------------------
benh_nhan_record  predict  benh_nhan
benh_nhan_record  predict  benh_nhan
benh_nhan_record  predict  benh_nhan
benh_nhan_record  predict  benh_nhan
benh_nhan_record  predict  benh_nhan
benh_nhan_record  predict  benh_nhan
benh_nhan_record  predict  benh_nhan
benh_nhan_record  predict  benh_nhan
duoc_record  predict  duoc
duoc_record  predict  duoc
duoc_record  predict  duoc
duoc_record  predict  duoc
duoc_record  predict  duoc
duoc_record  predict  duoc
duoc_record  predict  duoc
duoc_record  predict  duoc
duoc_record  predict  duoc
khong_record  predict  co_the
khong_record  predict  benh_nhan
khong_record  predict  khong
khong_record  predict  khong
khong_record  predict  nguoi
khong_record  predict  khong
khong_record  predict  benh_nhan
khong_record  predict  khong
khong_record  predict  benh_nhan
co_the_record  predict  co_the


In [22]:
import pickle
for cname in class_names:
    with open(f'{cname}.pkl', 'wb') as file:
        pickle.dump(models[cname], file)


In [28]:
with open('benh_nhan.pkl', 'rb') as file:
    load_model = pickle.load(file)
print(np.round_(load_model.transmat_, decimals = 1))

[[0.7 0.3 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.7 0.3 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.4 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.2 0.  0.3 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.8 0.1 0.1 0.1 0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.9 0.  0.  0.1 0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.6 0.3 0.  0.1 0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.7 0.  0.  0.3 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.7 0.2 0.  0.1 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.7 0.2 0.1 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.7 0.3 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.4 0.  0.6 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.9 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0. 

In [56]:
import threading
import pyaudio
import wave
import tkinter as tk

class App():
    chunk = 1024 
    sample_format = pyaudio.paInt16 
    channels = 2
    fs = 44100  
    
    frames = []  
    def __init__(self, master):
        self.isrecording = False
        self.button1 = tk.Button(main, text='rec',command=self.startrecording)
        self.button2 = tk.Button(main, text='stop',command=self.stoprecording)
      
        self.button1.pack()
        self.button2.pack()

    def startrecording(self):
        self.p = pyaudio.PyAudio()  
        self.stream = self.p.open(format=self.sample_format,channels=self.channels,rate=self.fs,frames_per_buffer=self.chunk,input=True)
        self.isrecording = True
        
        #print('Recording')
        t = threading.Thread(target=self.record)
        t.start()

    def stoprecording(self):
        self.isrecording = False
        #print('recording complete')
#         self.filename=input('the filename?')
        
#         self.filename = self.filename+".wav"
        self.filename = 'hmm_data/test/test.wav'
        wf = wave.open(self.filename, 'wb')
        wf.setnchannels(self.channels)
        wf.setsampwidth(self.p.get_sample_size(self.sample_format))
        wf.setframerate(self.fs)
        wf.writeframes(b''.join(self.frames))
        wf.close()
        main.destroy()
    def record(self):
       
        while self.isrecording:
            data = self.stream.read(self.chunk)
            self.frames.append(data)

            
main = tk.Tk()
main.title('recorder')
main.geometry('500x500')
app = App(main)
main.mainloop()

record_class_name = ['test']
recordtestset = {}

for cname in record_class_name:
    #print(f"Load {cname} dataset")
    recordtestset[cname] = get_class_data(os.path.join("hmm_data", cname))
for cname in record_class_name:
    class_vectors = recordtestset[cname]
    recordtestset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in recordtestset[cname]])
print ('------------------ predict --------------------')
for true_cname in record_class_name:
    for O in recordtestset[true_cname]:
        score = {cname : model.score(O) for cname, model in models.items()}
        print('predict ', max(score, key=score.get))





------------------ predict --------------------
predict  nguoi
