In [3]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [327]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [328]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [329]:
def clustering(X, n_clusters=20):
    kmeans = KMeans(n_clusters=n_clusters, n_init=20, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [330]:
class_names = ["nguoi", "cua", "va", "khong", "benh_nhan"]
dataset = {}

for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("hmm_data", cname))

Load nguoi dataset
Load cua dataset
Load va dataset
Load khong dataset
Load benh_nhan dataset


In [331]:
all_vector = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("all_vector", all_vector.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vector)


all_vector (9974, 36)
centers (20, 36)


In [332]:
def get_start_config(cname):
    if cname == 'va' :
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0,],
            [0.0,0.0,0.7,0.2,0.1,0.0,],
            [0.0,0.0,0.0,0.7,0.2,0.1,],
            [0.0,0.0,0.0,0.0,0.8,0.2,],
            [0.0,0.0,0.0,0.0,0.0,1.0,],
        ])
        return startprob, transmat
    if cname == 'cua':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'nguoi':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'khong':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'benh_nhan':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
        ])
        return startprob, transmat
    return None

In [333]:
trainset={}
testset={}

for cname in class_names :
    n = len(dataset[cname])
    ntrain = math.floor(n*0.7)
    trainset[cname] = dataset[cname][:ntrain]
    testset[cname] = dataset[cname][ntrain:]

In [334]:
def get_ncomponent(cname) :
    if cname == 'va' :
        return 2
    if cname == 'cua':
        return 3
    if cname == 'nguoi':
        return 4
    if cname == 'khong':
        return 3
    if cname == 'benh_nhan':
        return 6

In [335]:
models = {}
for cname in class_names:
    class_vectors = trainset[cname]
    startprob_, transmat_ = get_start_config(cname)
#     n_components = get_ncomponent(cname)
#     print(n_components)
    print(type(transmat_))

print("Training done")

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
Training done


In [336]:
def func() :
    return np.array([1])
b = func()
print (type(b))

<class 'numpy.ndarray'>


In [337]:
models = {}
for cname in class_names:
    class_vectors = trainset[cname]
    
    trainset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in testset[cname]])
    startprob_, transmat_ = get_start_config(cname)
    n_components = get_ncomponent(cname)
    print(n_components)
    print(transmat_.shape)
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=n_components*3,startprob_prior=1.0, transmat_prior=1.0, random_state=0,n_iter=100, verbose=True)
    hmm.startprob_ = startprob_
    hmm.transmat_ = transmat_
    hmm.init_params = 'e'
    hmm.params = 'ste'
#         if cname[:4] != 'test':
    X = np.concatenate(trainset[cname])
    lengths = list([len(x) for x in trainset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
    
print("Training done")

4
(12, 12)
training class nguoi
(1245, 1) [17, 39, 29, 24, 30, 17, 30, 20, 13, 27, 20, 19, 26, 20, 23, 30, 21, 17, 19, 40, 20, 49, 41, 53, 28, 27, 28, 28, 14, 20, 42, 19, 19, 19, 21, 21, 22, 30, 23, 29, 23, 27, 20, 21, 26, 23, 28, 25, 18] 49


         1       -3634.3570             +nan
         2       -2742.6405        +891.7166
         3       -2370.6733        +371.9672
         4       -2225.6333        +145.0399
         5       -2162.8612         +62.7721
         6       -2132.1238         +30.7374
         7       -2109.1104         +23.0135
         8       -2087.4756         +21.6347
         9       -2068.8022         +18.6735
        10       -2055.1835         +13.6187
        11       -2046.5203          +8.6632
        12       -2040.8119          +5.7083
        13       -2036.7647          +4.0472
        14       -2033.5250          +3.2397
        15       -2030.3205          +3.2045
        16       -2025.5281          +4.7924
        17       -2016.2744          +9.2537
        18       -2004.1207         +12.1537
        19       -1993.1969         +10.9238
        20       -1988.1408          +5.0561
        21       -1985.8154          +2.3254
        22       -1984.1020          +1.7134
        23

3
(9, 9)
training class cua
(1490, 1) [25, 38, 34, 22, 24, 26, 22, 24, 49, 28, 24, 28, 39, 26, 32, 30, 24, 33, 40, 36, 51, 29, 28, 33, 26, 28, 24, 31, 37, 22, 28, 27, 31, 37, 39, 26, 26, 28, 40, 29, 32, 33, 21, 26, 24, 47, 22, 35, 26] 49


         1       -4401.2852             +nan
         2       -3296.0771       +1105.2081
         3       -2935.5026        +360.5746
         4       -2779.4335        +156.0690
         5       -2734.9295         +44.5040
         6       -2714.5357         +20.3939
         7       -2699.4677         +15.0680
         8       -2686.5872         +12.8805
         9       -2676.6943          +9.8929
        10       -2659.7229         +16.9714
        11       -2650.6672          +9.0556
        12       -2644.1059          +6.5614
        13       -2640.8372          +3.2687
        14       -2639.1364          +1.7008
        15       -2637.5922          +1.5442
        16       -2635.4304          +2.1618
        17       -2629.7354          +5.6950
        18       -2623.1444          +6.5910
        19       -2615.5703          +7.5740
        20       -2611.7707          +3.7996
        21       -2609.0753          +2.6954
        22       -2607.5712          +1.5041
        23

2
(6, 6)
training class va
(879, 1) [18, 17, 20, 18, 24, 20, 19, 17, 19, 16, 18, 17, 18, 24, 19, 18, 17, 22, 13, 23, 28, 13, 20, 22, 16, 17, 23, 13, 15, 23, 17, 23, 17, 19, 14, 13, 21, 21, 17, 17, 13, 14, 17, 17, 18, 19, 19, 16] 48


         2       -2139.6914        +497.2937
         3       -1988.9754        +150.7160
         4       -1900.3317         +88.6437
         5       -1826.4913         +73.8403
         6       -1791.9825         +34.5089
         7       -1782.7154          +9.2671
         8       -1779.5424          +3.1730
         9       -1777.5974          +1.9450
        10       -1776.1445          +1.4529
        11       -1775.2614          +0.8830
        12       -1774.6526          +0.6088
        13       -1774.0915          +0.5612
        14       -1773.5677          +0.5238
        15       -1773.1455          +0.4222
        16       -1772.8007          +0.3448
        17       -1772.4846          +0.3161
        18       -1772.1883          +0.2963
        19       -1771.9256          +0.2628
        20       -1771.7101          +0.2155
        21       -1771.5446          +0.1655
        22       -1771.4218          +0.1228
        23       -1771.3314          +0.0904
        24

3
(9, 9)
training class khong
(1492, 1) [27, 36, 30, 20, 24, 23, 25, 32, 26, 22, 37, 42, 32, 28, 20, 46, 49, 31, 42, 17, 24, 47, 26, 24, 43, 24, 37, 27, 28, 27, 43, 33, 24, 40, 21, 31, 55, 31, 28, 27, 26, 23, 30, 21, 29, 37, 25, 28, 24] 49


         1       -4449.1002             +nan
         2       -3610.5004        +838.5998
         3       -3192.4692        +418.0312
         4       -2940.6106        +251.8586
         5       -2830.5698        +110.0408
         6       -2763.5567         +67.0131
         7       -2725.9863         +37.5704
         8       -2706.9525         +19.0338
         9       -2698.2146          +8.7379
        10       -2694.1028          +4.1118
        11       -2692.0818          +2.0210
        12       -2690.6567          +1.4252
        13       -2689.5649          +1.0918
        14       -2688.7189          +0.8460
        15       -2688.0340          +0.6848
        16       -2687.4615          +0.5726
        17       -2686.9563          +0.5051
        18       -2686.4593          +0.4970
        19       -2685.9012          +0.5582
        20       -2685.1970          +0.7041
        21       -2684.2198          +0.9772
        22       -2682.7311          +1.4887
        23

6
(18, 18)
training class benh_nhan
(1913, 1) [29, 41, 52, 55, 44, 44, 45, 32, 45, 34, 38, 45, 28, 30, 41, 43, 32, 32, 31, 30, 34, 26, 45, 30, 45, 44, 30, 33, 40, 35, 30, 42, 29, 43, 42, 43, 31, 41, 43, 41, 47, 54, 35, 39, 52, 51, 39, 32, 46] 49


         3       -3872.8720        +311.3716
         4       -3647.2848        +225.5872
         5       -3504.4273        +142.8575
         6       -3436.5533         +67.8740
         7       -3394.8947         +41.6587
         8       -3360.7473         +34.1474
         9       -3329.3374         +31.4099
        10       -3299.9725         +29.3649
        11       -3278.5281         +21.4444
        12       -3266.1386         +12.3895
        13       -3260.8687          +5.2699
        14       -3255.7112          +5.1575
        15       -3244.2979         +11.4133
        16       -3236.6000          +7.6980
        17       -3231.8059          +4.7940
        18       -3228.4687          +3.3372
        19       -3226.0363          +2.4324
        20       -3223.9135          +2.1228
        21       -3222.1565          +1.7570
        22       -3220.5752          +1.5814
        23       -3218.6915          +1.8837
        24       -3214.6991          +3.9924
        25

Training done


        68       -3122.1066          +0.0134
        69       -3122.0980          +0.0086


In [338]:

#print(len(testset['khong']))
print("Testing")
true_names=["cua", "nguoi", "va", "benh_nhan", "khong"]
prid_correct={'cua' : 0, 'nguoi':0, 'va':0, 'benh_nhan':0, 'khong':0}
# #true_label=[1, 2, 3, 4]
for true_cname in true_names:
    for O in testset[true_cname]:
        score = {cname : model.score(O) for cname, model in models.items()}
        if (prid_correct == max(score, key=score.get)) : prid_correct[true_cname]+=1
        print(true_cname, ' predict ', max(score, key=score.get))

Testing
cua  predict  va
cua  predict  cua
cua  predict  benh_nhan
cua  predict  cua
cua  predict  cua
cua  predict  cua
cua  predict  cua
cua  predict  khong
cua  predict  cua
cua  predict  cua
cua  predict  cua
cua  predict  nguoi
cua  predict  cua
cua  predict  va
cua  predict  cua
cua  predict  khong
cua  predict  cua
cua  predict  khong
cua  predict  nguoi
cua  predict  cua
cua  predict  cua
nguoi  predict  va
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  khong
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  benh_nhan
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  va
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
va  predict  va
va  predict  va
va  predict  cua
va  predict  va
va  predict  va
va  predict  nguoi
va  predict  va
va  predict  va
va  pred

In [266]:
prid_correct

{'cua': 0, 'nguoi': 0, 'va': 0, 'benh_nhan': 0, 'khong': 0}