In [27]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [40]:
def get_mfcc(file_path):
    try:
        y, sr = librosa.load(file_path) # read .wav file
        hop_length = math.floor(sr*0.010) # 10ms hop
        win_length = math.floor(sr*0.025) # 25ms frame
        # mfcc is 12 x T matrix
        mfcc = librosa.feature.mfcc(
            y, sr, n_mfcc=12, n_fft=1024,
            hop_length=hop_length, win_length=win_length)
        # substract mean from mfcc --> normalize mfcc
        mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
        # delta feature 1st order and 2nd order
        delta1 = librosa.feature.delta(mfcc, order=1)
        delta2 = librosa.feature.delta(mfcc, order=2)
        # X is 36 x T
        X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
        # return T x 36 (transpose of X)
        return X.T # hmmlearn use T x N matrix
    except Exception as e:
        print(file_path)
        print(e)
        pass

In [41]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [42]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [65]:
# class_names = ['cua', 'va', 'khong', 'nguoi']
class_names=['benh_nhan','cua', 'test_benh_nhan', 'test_cua']
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("hmm_data", cname))

Load benh_nhan dataset
Load cua dataset
Load test_benh_nhan dataset
Load test_cua dataset


In [51]:
len(dataset['va'])

100

In [66]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

models = {}
for cname in class_names:
#     print(cname[:4])
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
    hmm = hmmlearn.hmm.MultinomialHMM(
        n_components=6, random_state=0, n_iter=1000, verbose=True,
        startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
        transmat_prior=np.array([
            [0.1,0.5,0.1,0.1,0.1,0.1,],
            [0.1,0.1,0.5,0.1,0.1,0.1,],
            [0.1,0.1,0.1,0.5,0.1,0.1,],
            [0.1,0.1,0.1,0.1,0.5,0.1,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
            [0.1,0.1,0.1,0.1,0.1,0.5,],
        ]),
    )
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")



vectors (6952, 36)
centers (10, 36)
centers (10, 36)
training class benh_nhan
(2831, 1) [42, 37, 37, 38, 47, 35, 41, 41, 44, 32, 35, 28, 32, 44, 47, 36, 38, 46, 46, 34, 33, 35, 37, 28, 30, 45, 48, 29, 38, 43, 44, 55, 45, 26, 29, 43, 33, 30, 37, 30, 45, 42, 42, 45, 41, 44, 52, 41, 43, 34, 45, 51, 41, 62, 40, 44, 47, 52, 39, 54, 35, 41, 43, 47, 41, 51, 39, 33, 38, 41] 70


         1       -6543.9842             +nan
         2       -5828.1158        +715.8684
         3       -5786.5703         +41.5455
         4       -5702.7214         +83.8489
         5       -5533.7434        +168.9780
         6       -5310.1706        +223.5728
         7       -5133.4543        +176.7163
         8       -4962.4651        +170.9893
         9       -4745.9989        +216.4662
        10       -4483.9734        +262.0255
        11       -4202.1922        +281.7812
        12       -4035.4404        +166.7518
        13       -3978.1972         +57.2432
        14       -3956.3797         +21.8175
        15       -3943.0162         +13.3635
        16       -3932.7169         +10.2993
        17       -3922.8139          +9.9030
        18       -3912.2018         +10.6121
        19       -3900.0731         +12.1287
        20       -3884.6884         +15.3847
        21       -3862.2032         +22.4852
        22       -3823.8700         +38.3332
        23

training class cua
(2113, 1) [24, 26, 51, 47, 24, 49, 28, 24, 26, 34, 25, 38, 43, 42, 38, 34, 29, 30, 27, 25, 36, 40, 22, 22, 35, 24, 21, 26, 42, 40, 26, 24, 30, 22, 28, 40, 31, 37, 28, 26, 26, 22, 21, 32, 26, 39, 24, 26, 33, 24, 32, 27, 29, 26, 28, 24, 37, 30, 39, 24, 33, 33, 28, 36, 22, 28, 31, 29, 40] 69


         4       -4172.7786        +104.8339
         5       -4006.5377        +166.2409
         6       -3752.2591        +254.2786
         7       -3476.9556        +275.3035
         8       -3342.6318        +134.3238
         9       -3267.8802         +74.7516
        10       -3215.8380         +52.0422
        11       -3180.4888         +35.3492
        12       -3150.5750         +29.9138
        13       -3106.6972         +43.8778
        14       -3043.1113         +63.5859
        15       -2997.5331         +45.5781
        16       -2974.0707         +23.4624
        17       -2953.5000         +20.5707
        18       -2925.0056         +28.4944
        19       -2882.8940         +42.1116
        20       -2846.4485         +36.4455
        21       -2823.8546         +22.5939
        22       -2809.5818         +14.2728
        23       -2800.9849          +8.5969
        24       -2795.8405          +5.1444
        25       -2792.9489          +2.8916
        26

Training done


        45       -2788.9815          +0.0101
        46       -2788.9730          +0.0085


In [83]:
preds = []
print("Testing")
for true_cname in class_names:
    if 'test' in true_cname:
        for O in dataset[true_cname]:
    #         print(models)
            score = [model.score(O, [len(O)]) for _, model in models.items()]
            label_pred = np.argmax(score, axis=0)
            preds.append(label_pred)
            print(true_cname, score)
preds

Testing
test_benh_nhan [-55.50121571407498, -71.86725685101258]
test_benh_nhan [-52.850132041216696, -71.49468113522997]
test_benh_nhan [-75.83487380589428, -84.07965246549998]
test_benh_nhan [-51.02531272513158, -73.64481637304077]
test_benh_nhan [-48.36299363162022, -61.997729023910324]
test_benh_nhan [-79.37224532991533, -99.59438540872662]
test_benh_nhan [-61.32924759389266, -100.8081654233571]
test_benh_nhan [-75.32779306325995, -93.57163317647137]
test_benh_nhan [-69.82584421466375, -92.4855154054572]
test_benh_nhan [-53.28885965167403, -86.57640395690997]
test_benh_nhan [-46.90879158882474, -70.47669035644194]
test_benh_nhan [-59.54492207365401, -78.33888215792419]
test_benh_nhan [-65.69798611152194, -93.11992738041428]
test_benh_nhan [-56.36702419458729, -75.0073717736923]
test_benh_nhan [-75.62420882348349, -96.78162632666681]
test_benh_nhan [-79.45170528988274, -74.26640932844815]
test_benh_nhan [-60.29035275865507, -59.81535404635203]
test_benh_nhan [-70.49959397530755, -71.

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1]

In [78]:
print("Testing")
for true_cname in class_names:
    if 'test' in true_cname:
        for O in dataset[true_cname]:
            print(len(O))
            score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
    #         score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if 'test' in cname}
    #         score = sigmoid(score)
            print(true_cname, score)

Testing
40
test_benh_nhan {'benh_nhan': -55.50121571407498, 'cua': -71.86725685101258}
36
test_benh_nhan {'benh_nhan': -52.850132041216696, 'cua': -71.49468113522997}
44
test_benh_nhan {'benh_nhan': -75.83487380589428, 'cua': -84.07965246549998}
34
test_benh_nhan {'benh_nhan': -51.02531272513158, 'cua': -73.64481637304077}
35
test_benh_nhan {'benh_nhan': -48.36299363162022, 'cua': -61.997729023910324}
63
test_benh_nhan {'benh_nhan': -79.37224532991533, 'cua': -99.59438540872662}
46
test_benh_nhan {'benh_nhan': -61.32924759389266, 'cua': -100.8081654233571}
46
test_benh_nhan {'benh_nhan': -75.32779306325995, 'cua': -93.57163317647137}
48
test_benh_nhan {'benh_nhan': -69.82584421466375, 'cua': -92.4855154054572}
44
test_benh_nhan {'benh_nhan': -53.28885965167403, 'cua': -86.57640395690997}
34
test_benh_nhan {'benh_nhan': -46.90879158882474, 'cua': -70.47669035644194}
41
test_benh_nhan {'benh_nhan': -59.54492207365401, 'cua': -78.33888215792419}
45
test_benh_nhan {'benh_nhan': -65.6979861