In [82]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [160]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [161]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [162]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [210]:
class_names = ["nguoi", "test_nguoi", "cua", "test_cua", "va", "test_va", "khong", "test_khong", "benh_nhan", "test_benh_nhan"]
trainset = {}
testset= {}

for cname in class_names:
    print(f"Load {cname} dataset")
    if (cname[:4] =='test') : 
        testset[cname] = get_class_data(os.path.join("hmm_data", cname))
    else:
        trainset[cname] = get_class_data(os.path.join("hmm_data", cname))

Load nguoi dataset
Load test_nguoi dataset
Load cua dataset
Load test_cua dataset
Load va dataset
Load test_va dataset
Load khong dataset
Load test_khong dataset
Load benh_nhan dataset
Load test_benh_nhan dataset


In [211]:
train_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in trainset.items()], axis=0)
test_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in testset.items()], axis=0)
print("train_vectors", train_vectors.shape)
print("test_vectors", test_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(train_vectors)
kmeans = clustering(test_vectors)
print("centers", kmeans.cluster_centers_.shape)

train_vectors (9974, 36)
test_vectors (4362, 36)
centers (10, 36)
centers (10, 36)
centers (10, 36)


In [212]:
def get_start_config(cname):
    if cname == 'va' :
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
        transmat=np.array([
            [0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.1,0.5,0.1,0.1,0.1,],
            [0.1,0.1,0.1,0.5,0.1,0.1,],
            [0.1,0.1,0.1,0.1,0.5,0.1,],
            [0.1,0.1,0.1,0.5,0.1,0.1,],
            [0.1,0.1,0.1,0.1,0.5,0.1,],
        ]),
        return startprob, transmat
    if cname == 'cua':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
        transmat=np.array([
            [0.1,0.5,0.1,0.1,0.1,0.1,],
            [0.1,0.1,0.5,0.1,0.1,0.1,],
            [0.1,0.1,0.1,0.5,0.1,0.1,],
            [0.1,0.1,0.1,0.1,0.5,0.1,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
        ]),
        return startprob, transmat
    if cname == 'nguoi':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
        transmat=np.array([
            [0.1,0.5,0.1,0.1,0.1,0.1,],
            [0.1,0.1,0.5,0.1,0.1,0.1,],
            [0.1,0.1,0.1,0.5,0.1,0.1,],
            [0.1,0.1,0.1,0.1,0.5,0.1,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
        ]),
        return startprob, transmat
    if cname == 'khong':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
        transmat=np.array([
            [0.1,0.5,0.1,0.1,0.1,0.1,],
            [0.1,0.1,0.5,0.1,0.1,0.1,],
            [0.1,0.1,0.1,0.5,0.1,0.1,],
            [0.1,0.1,0.1,0.1,0.5,0.1,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
        ]),
        return startprob, transmat
    if cname == 'benh_nhan':
        startprob=np.array([0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1]),
        transmat=np.array([
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
            [0.1,0.5,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.1],
        ]),
        return startprob, transmat
    return None

In [213]:
models = {}
for cname in class_names:
    if cname[:4] != "test":
        class_vectors = trainset[cname]
        # convert all vectors to the cluster index
        # dataset['one'] = [O^1, ... O^R]
        # O^r = (c1, c2, ... ct, ... cT)
        # O^r size T x 1
        trainset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in trainset[cname]])
        
        startprob_, transmat_ = get_start_config(cname)
        
        hmm = hmmlearn.hmm.MultinomialHMM(n_components=6, random_state=0, n_iter=1000, verbose=True)
        hmm.startprob_ = startprob_
        hmm.transmat_ = transmat_
#         if cname[:4] != 'test':
        X = np.concatenate(trainset[cname])
        lengths = list([len(x) for x in trainset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")

training class nguoi
(1708, 1) [17, 39, 29, 24, 30, 17, 30, 20, 13, 27, 20, 19, 26, 20, 23, 30, 21, 17, 19, 40, 20, 49, 41, 53, 28, 27, 28, 28, 14, 20, 42, 19, 19, 19, 21, 21, 22, 30, 23, 29, 23, 27, 20, 21, 26, 23, 28, 25, 18, 22, 15, 22, 30, 19, 21, 24, 25, 26, 16, 16, 17, 23, 20, 20, 27, 20, 24, 32, 18, 26] 70


         1       -3867.2574             +nan
         2       -3523.1029        +344.1545
         3       -3499.2446         +23.8583
         4       -3456.0332         +43.2114
         5       -3383.7822         +72.2510
         6       -3284.0121         +99.7702
         7       -3126.3673        +157.6448
         8       -2908.8060        +217.5613
         9       -2709.2677        +199.5384
        10       -2574.1413        +135.1263
        11       -2512.6351         +61.5062
        12       -2478.2487         +34.3864
        13       -2446.8676         +31.3812
        14       -2413.7185         +33.1490
        15       -2381.5067         +32.2118
        16       -2346.8571         +34.6496
        17       -2312.4229         +34.4342
        18       -2286.6018         +25.8210
        19       -2270.3052         +16.2966
        20       -2260.3175          +9.9877
        21       -2254.4911          +5.8265
        22       -2251.2880          +3.2031
        23

training class cua
(2139, 1) [25, 38, 34, 22, 24, 26, 22, 24, 49, 28, 24, 28, 39, 26, 32, 30, 24, 33, 40, 36, 51, 29, 28, 33, 26, 28, 24, 31, 37, 22, 28, 27, 31, 37, 39, 26, 26, 28, 40, 29, 32, 33, 21, 26, 24, 47, 22, 35, 26, 38, 24, 26, 27, 30, 24, 30, 40, 34, 29, 26, 42, 21, 36, 25, 24, 43, 40, 26, 42, 22] 70


         3       -4415.4370         +21.9710
         4       -4380.7052         +34.7318
         5       -4323.6816         +57.0236
         6       -4237.6436         +86.0380
         7       -4121.7229        +115.9208
         8       -3941.6541        +180.0687
         9       -3681.4704        +260.1837
        10       -3483.9961        +197.4743
        11       -3386.2891         +97.7070
        12       -3296.1077         +90.1814
        13       -3211.6910         +84.4167
        14       -3157.6230         +54.0680
        15       -3120.3323         +37.2907
        16       -3088.8503         +31.4820
        17       -3064.0767         +24.7736
        18       -3046.7539         +17.3228
        19       -3034.7161         +12.0378
        20       -3026.2679          +8.4483
        21       -3020.2370          +6.0309
        22       -3015.5483          +4.6887
        23       -3011.2376          +4.3107
        24       -3006.5221          +4.7156
        25

training class va
(1292, 1) [18, 17, 20, 18, 24, 20, 19, 17, 19, 16, 18, 17, 18, 24, 19, 18, 17, 22, 13, 23, 28, 13, 20, 22, 16, 17, 23, 13, 15, 23, 17, 23, 17, 19, 14, 13, 21, 21, 17, 17, 13, 14, 17, 17, 18, 19, 19, 16, 16, 24, 21, 18, 12, 22, 20, 18, 21, 29, 29, 19, 24, 19, 17, 18, 17, 22, 13, 16, 18] 69


         2       -2752.1838        +210.7310
         3       -2733.0805         +19.1033
         4       -2704.9170         +28.1636
         5       -2659.8132         +45.1038
         6       -2596.8362         +62.9770
         7       -2531.1381         +65.6980
         8       -2470.7096         +60.4285
         9       -2405.4413         +65.2683
        10       -2333.6369         +71.8044
        11       -2265.8424         +67.7945
        12       -2213.8370         +52.0054
        13       -2175.7162         +38.1207
        14       -2136.2301         +39.4861
        15       -2096.9313         +39.2988
        16       -2062.6370         +34.2944
        17       -2037.4981         +25.1389
        18       -2024.1965         +13.3016
        19       -2016.4988          +7.6977
        20       -2009.6915          +6.8073
        21       -2001.3403          +8.3512
        22       -1986.9577         +14.3825
        23       -1945.0562         +41.9015
        24

training class khong
(2083, 1) [27, 36, 30, 20, 24, 23, 25, 32, 26, 22, 37, 42, 32, 28, 20, 46, 49, 31, 42, 17, 24, 47, 26, 24, 43, 24, 37, 27, 28, 27, 43, 33, 24, 40, 21, 31, 55, 31, 28, 27, 26, 23, 30, 21, 29, 37, 25, 28, 24, 27, 29, 27, 33, 32, 32, 26, 21, 49, 23, 20, 33, 25, 28, 26, 19, 25, 23, 32, 26, 35] 70


         1       -4832.4107             +nan
         2       -4552.6577        +279.7530
         3       -4494.3908         +58.2669
         4       -4387.4323        +106.9585
         5       -4216.1477        +171.2846
         6       -3982.6045        +233.5432
         7       -3742.7168        +239.8878
         8       -3503.7576        +238.9591
         9       -3197.9338        +305.8238
        10       -2976.6578        +221.2760
        11       -2914.4237         +62.2342
        12       -2891.6348         +22.7889
        13       -2863.4375         +28.1973
        14       -2828.6081         +34.8295
        15       -2801.5302         +27.0778
        16       -2784.8335         +16.6967
        17       -2774.8636          +9.9699
        18       -2768.8519          +6.0117
        19       -2764.7332          +4.1187
        20       -2761.6731          +3.0601
        21       -2759.4401          +2.2330
        22       -2757.9298          +1.5104
        23

training class benh_nhan
(2752, 1) [29, 41, 52, 55, 44, 44, 45, 32, 45, 34, 38, 45, 28, 30, 41, 43, 32, 32, 31, 30, 34, 26, 45, 30, 45, 44, 30, 33, 40, 35, 30, 42, 29, 43, 42, 43, 31, 41, 43, 41, 47, 54, 35, 39, 52, 51, 39, 32, 46, 46, 46, 48, 40, 38, 46, 30, 33, 63, 36, 41, 34, 36, 44, 33, 43, 41, 35, 37, 34, 35] 70


         3       -5410.4786         +41.6148
         4       -5339.1477         +71.3309
         5       -5222.2061        +116.9416
         6       -5066.8970        +155.3091
         7       -4910.3786        +156.5184
         8       -4753.5073        +156.8712
         9       -4560.5928        +192.9145
        10       -4349.3121        +211.2807
        11       -4165.9063        +183.4058
        12       -4044.3188        +121.5875
        13       -3953.3988         +90.9200
        14       -3880.6666         +72.7323
        15       -3831.7493         +48.9173
        16       -3802.6158         +29.1335
        17       -3785.4065         +17.2093
        18       -3773.7240         +11.6824
        19       -3764.2809          +9.4431
        20       -3755.5252          +8.7557
        21       -3747.0250          +8.5002
        22       -3738.9222          +8.1029
        23       -3731.5812          +7.3410
        24       -3725.5926          +5.9886
        25

Training done


        89       -3678.3123          +0.0251
        90       -3678.2989          +0.0134
        91       -3678.2919          +0.0070


In [209]:
dataset[1]

KeyError: 1

In [227]:

print("Testing")
true_names=["cua", "nguoi", "va", "benh_nhan", "khong"]
true_test=['test_cua', 'test_nguoi', 'test_va', 'test_benh_nhan', 'test_khong']
prid_correct={'cua' : 0, 'nguoi':0, 'va':0, 'benh_nhan':0, 'khong':0}
test_prid_correct = {'test_cua':0, 'test_nguoi':0, 'test_va':0, 'test_benh_nhan':0, 'test_khong':0}
#true_label=[1, 2, 3, 4]
for true_cname in true_test:
    for O in testset[true_cname]:
        score = {cname : model.score(O) for true_cname, model in models.items()}
        if (test_prid_correct == max(score, key=score.get)) : test_prid_correct[true_cname]+=1
        print(true_cname, ' predict ', max(score, key=score.get))

Testing


IndexError: arrays used as indices must be of integer (or boolean) type