In [27]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import pickle

In [28]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # get power
    power = librosa.feature.rms(y=y, frame_length=win_length, hop_length=hop_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1))
    # mfcc is 13 x T matrix now
    mfcc = np.concatenate([mfcc, power], axis=0)
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 39 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 39 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [29]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [30]:
def clustering(X, n_clusters=30):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

In [31]:
class_names = ["benhnhan", "vietnam", "cach", "nguoi", "phai",
                   "test_benhnhan", "test_vietnam", "test_cach", "test_nguoi", "test_phai"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

Load benhnhan dataset
Load vietnam dataset
Load cach dataset
Load nguoi dataset
Load phai dataset
Load test_benhnhan dataset
Load test_vietnam dataset
Load test_cach dataset
Load test_nguoi dataset
Load test_phai dataset
vectors (50279, 39)
centers (30, 39)
centers (30, 39)


In [32]:
models = {}
for cname in class_names:
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
    hmm = hmmlearn.hmm.MultinomialHMM(
        n_components=18, random_state=0, n_iter=1000, verbose=True,
#         startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
#         transmat_prior=np.array([
#             [0.1,0.5,0.1,0.1,0.1,0.1,],
#             [0.1,0.1,0.5,0.1,0.1,0.1,],
#             [0.1,0.1,0.1,0.5,0.1,0.1,],
#             [0.1,0.1,0.1,0.1,0.5,0.1,],
#             [0.1,0.1,0.1,0.1,0.1,0.5,],
#             [0.1,0.1,0.1,0.1,0.1,0.5,],
#         ]),
    )
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")

training class benhnhan
(6250, 1) [40, 45, 155, 41, 27, 58, 42, 56, 130, 39, 130, 122, 42, 32, 37, 33, 58, 45, 153, 35, 61, 52, 40, 50, 27, 59, 52, 50, 102, 39, 51, 48, 44, 44, 37, 50, 37, 35, 31, 65, 49, 55, 49, 41, 114, 50, 32, 47, 140, 65, 41, 34, 62, 130, 54, 39, 47, 50, 163, 48, 49, 35, 38, 40, 49, 40, 56, 54, 32, 119, 40, 39, 37, 47, 41, 38, 42, 43, 45, 40, 56, 36, 269, 45, 52, 46, 66, 58, 50, 68, 33, 38, 189, 42, 53, 44, 145, 37, 47, 51, 39, 145, 42, 36, 35] 105


         1      -21128.3691             +nan
         2      -17722.8224       +3405.5468
         3      -17661.5813         +61.2410
         4      -17540.7974        +120.7839
         5      -17227.0759        +313.7216
         6      -16464.0457        +763.0302
         7      -15286.2429       +1177.8028
         8      -14239.6656       +1046.5773
         9      -13711.3601        +528.3055
        10      -13323.2964        +388.0637
        11      -12983.8015        +339.4949
        12      -12591.5467        +392.2548
        13      -12250.5763        +340.9704
        14      -11968.7789        +281.7974
        15      -11689.9482        +278.8307
        16      -11423.0774        +266.8708
        17      -11210.0647        +213.0127
        18      -11037.8447        +172.2200
        19      -10948.3461         +89.4986
        20      -10904.1496         +44.1966
        21      -10871.1257         +33.0239
        22      -10844.2444         +26.8813
        23

       184      -10037.4196          +0.1327
       185      -10037.2103          +0.2093
       186      -10036.7107          +0.4996
       187      -10035.6691          +1.0416
       188      -10034.5525          +1.1166
       189      -10033.8761          +0.6764
       190      -10033.5012          +0.3749
       191      -10033.2185          +0.2827
       192      -10032.7744          +0.4441
       193      -10031.7395          +1.0349
       194      -10030.1105          +1.6290
       195      -10028.3807          +1.7298
       196      -10026.4316          +1.9491
       197      -10024.1077          +2.3239
       198      -10021.3794          +2.7283
       199      -10017.8608          +3.5187
       200      -10013.5238          +4.3370
       201      -10009.1995          +4.3242
       202      -10005.8328          +3.3667
       203      -10003.5879          +2.2449
       204      -10002.1890          +1.3990
       205      -10001.3427          +0.8462
       206

training class vietnam
(11620, 1) [61, 65, 21, 115, 41, 56, 61, 49, 41, 42, 44, 42, 11, 360, 108, 77, 59, 43, 35, 49, 55, 53, 41, 51, 51, 31, 49, 55, 151, 56, 38, 45, 97, 47, 52, 36, 40, 63, 122, 81, 118, 61, 49, 51, 62, 107, 76, 61, 66, 91, 222, 51, 35, 36, 47, 29, 41, 101, 41, 39, 47, 42, 106, 122, 105, 44, 28, 37, 71, 81, 51, 51, 114, 128, 47, 45, 26, 43, 161, 46, 76, 53, 51, 56, 123, 90, 95, 46, 37, 57, 45, 44, 51, 66, 72, 115, 46, 101, 65, 44, 133, 33, 91, 36, 44, 40, 61, 43, 61, 62, 36, 93, 40, 165, 61, 75, 47, 137, 41, 36, 34, 51, 33, 54, 53, 48, 48, 37, 41, 65, 101, 46, 59, 45, 46, 41, 66, 66, 38, 49, 36, 126, 55, 121, 53, 50, 42, 168, 54, 51, 41, 67, 59, 49, 97, 49, 40, 39, 39, 55, 52, 38, 43, 49, 51, 111, 38, 51, 71, 72, 61, 71, 49, 35, 51, 37, 53, 52, 60, 38, 61, 39, 42, 42] 184


         1      -39504.7608             +nan
         2      -36627.7573       +2877.0035
         3      -36528.4040         +99.3533
         4      -36342.8854        +185.5186
         5      -35815.9017        +526.9837
         6      -34307.4016       +1508.5001
         7      -32157.2667       +2150.1349
         8      -29640.8081       +2516.4586
         9      -27179.5226       +2461.2855
        10      -25127.4105       +2052.1121
        11      -23668.2552       +1459.1553
        12      -22921.0289        +747.2263
        13      -22391.1451        +529.8839
        14      -21996.4949        +394.6501
        15      -21700.4147        +296.0803
        16      -21493.6545        +206.7601
        17      -21368.8830        +124.7716
        18      -21271.6123         +97.2707
        19      -21158.0901        +113.5222
        20      -20937.4027        +220.6874
        21      -20603.4043        +333.9985
        22      -20456.2673        +147.1370
        23

       184      -19685.1767          +0.1368
       185      -19685.0376          +0.1391
       186      -19684.8948          +0.1428
       187      -19684.7488          +0.1461
       188      -19684.6017          +0.1471
       189      -19684.4573          +0.1444
       190      -19684.3198          +0.1375
       191      -19684.1930          +0.1268
       192      -19684.0794          +0.1136
       193      -19683.9803          +0.0992
       194      -19683.8953          +0.0849
       195      -19683.8236          +0.0718
       196      -19683.7634          +0.0602
       197      -19683.7129          +0.0504
       198      -19683.6705          +0.0424
       199      -19683.6345          +0.0360
       200      -19683.6035          +0.0311
       201      -19683.5760          +0.0274
       202      -19683.5507          +0.0253
       203      -19683.5253          +0.0254
       204      -19683.4960          +0.0294
       205      -19683.4545          +0.0415
       206

training class cach
(3474, 1) [16, 14, 15, 19, 21, 20, 13, 15, 13, 14, 16, 25, 22, 16, 14, 19, 16, 28, 15, 31, 12, 17, 17, 14, 13, 15, 16, 16, 16, 17, 15, 16, 20, 13, 16, 17, 18, 14, 15, 15, 17, 25, 19, 17, 16, 16, 17, 22, 15, 12, 13, 15, 13, 14, 15, 13, 22, 21, 12, 14, 18, 13, 18, 23, 16, 18, 14, 20, 20, 20, 17, 26, 19, 13, 14, 32, 15, 12, 15, 14, 18, 23, 25, 17, 12, 18, 22, 16, 17, 13, 14, 13, 16, 27, 17, 32, 11, 17, 13, 12, 19, 24, 21, 17, 26, 15, 16, 22, 15, 16, 20, 18, 15, 12, 23, 11, 12, 27, 12, 18, 14, 18, 29, 17, 14, 14, 19, 14, 15, 11, 15, 16, 17, 14, 15, 18, 19, 25, 13, 18, 15, 26, 12, 16, 15, 12, 12, 15, 15, 10, 19, 16, 14, 16, 13, 18, 16, 16, 16, 23, 12, 18, 28, 15, 19, 16, 10, 22, 12, 22, 21, 17, 14, 14, 19, 33, 16, 19, 21, 18, 13, 17, 15, 22, 15, 17, 18, 14, 15, 26, 16, 13, 13, 18, 17, 21, 14, 30, 15, 22, 17, 19] 202


         1      -11873.8458             +nan
         2      -11201.1344        +672.7114
         3      -11179.8906         +21.2438
         4      -11145.9010         +33.9896
         5      -11083.3305         +62.5704
         6      -10954.5277        +128.8028
         7      -10670.7554        +283.7723
         8      -10159.1984        +511.5570
         9       -9478.4691        +680.7293
        10       -8685.7271        +792.7419
        11       -8155.2219        +530.5053
        12       -7892.3481        +262.8737
        13       -7699.9848        +192.3634
        14       -7514.2954        +185.6894
        15       -7354.2506        +160.0448
        16       -7243.6055        +110.6451
        17       -7171.0604         +72.5451
        18       -7123.0723         +47.9880
        19       -7089.5018         +33.5706
        20       -7063.1600         +26.3418
        21       -7041.5310         +21.6290
        22       -7024.1270         +17.4040
        23

       184       -6764.9700          +0.1899
       185       -6764.8417          +0.1283
       186       -6764.7540          +0.0877
       187       -6764.6936          +0.0604
       188       -6764.6512          +0.0425
       189       -6764.6201          +0.0311
       190       -6764.5959          +0.0242
       191       -6764.5755          +0.0204
       192       -6764.5570          +0.0185
       193       -6764.5391          +0.0179
       194       -6764.5209          +0.0182
       195       -6764.5017          +0.0192
       196       -6764.4806          +0.0211
       197       -6764.4546          +0.0260
       198       -6764.4114          +0.0432
       199       -6764.2994          +0.1121
       200       -6763.9349          +0.3645
       201       -6762.9977          +0.9372
       202       -6761.7713          +1.2264
       203       -6761.0447          +0.7266
       204       -6760.7550          +0.2897
       205       -6760.6024          +0.1526
       206

training class nguoi
(16044, 1) [29, 21, 24, 21, 18, 21, 28, 18, 33, 27, 36, 15, 18, 18, 25, 18, 30, 17, 18, 51, 16, 16, 25, 21, 19, 19, 27, 43, 19, 22, 16, 23, 36, 26, 20, 26, 22, 23, 58, 19, 24, 25, 12, 35, 17, 38, 34, 17, 59, 29, 31, 20, 20, 15, 22, 16, 20, 19, 31, 17, 16, 19, 19, 30, 14, 31, 21, 18, 47, 22, 20, 19, 32, 29, 19, 23, 23, 19, 27, 17, 27, 50, 40, 26, 18, 24, 22, 22, 16, 16, 23, 18, 16, 63, 26, 20, 28, 20, 19, 38, 31, 26, 21, 33, 28, 31, 21, 20, 28, 20, 25, 27, 14, 21, 20, 23, 23, 25, 26, 30, 36, 19, 15, 16, 14, 25, 25, 32, 31, 20, 40, 25, 16, 23, 22, 59, 62, 19, 23, 49, 21, 14, 16, 36, 16, 18, 17, 23, 22, 31, 20, 28, 24, 22, 19, 23, 17, 19, 29, 19, 54, 21, 27, 23, 22, 33, 28, 43, 23, 13, 21, 27, 30, 19, 45, 22, 36, 21, 18, 21, 19, 14, 27, 19, 42, 29, 20, 40, 21, 48, 35, 19, 42, 24, 19, 19, 58, 26, 20, 27, 17, 14, 17, 23, 17, 15, 23, 19, 29, 45, 31, 20, 45, 56, 16, 20, 21, 28, 23, 23, 21, 27, 30, 19, 22, 19, 21, 23, 22, 17, 18, 25, 18, 27, 55, 64, 21, 26, 19, 16, 24, 18,

         1      -54345.5974             +nan
         2      -48786.7304       +5558.8670
         3      -48696.5470         +90.1834
         4      -48561.3375        +135.2095
         5      -48322.3782        +238.9593
         6      -47802.2610        +520.1171
         7      -46560.2632       +1241.9978
         8      -44131.9963       +2428.2669
         9      -40674.4628       +3457.5335
        10      -37402.8260       +3271.6368
        11      -35182.3637       +2220.4623
        12      -33693.1995       +1489.1641
        13      -32560.9978       +1132.2017
        14      -31542.1310       +1018.8668
        15      -30669.1230        +873.0080
        16      -30122.8327        +546.2903
        17      -29808.6691        +314.1636
        18      -29563.1555        +245.5136
        19      -29301.8124        +261.3431
        20      -28974.5489        +327.2635
        21      -28614.9690        +359.5799
        22      -28283.7169        +331.2521
        23

training class phai
(4258, 1) [14, 13, 17, 15, 18, 19, 10, 29, 18, 22, 17, 20, 19, 20, 17, 12, 28, 19, 27, 20, 12, 34, 20, 45, 20, 15, 20, 22, 20, 19, 20, 16, 24, 17, 17, 15, 16, 19, 15, 17, 26, 25, 22, 31, 19, 14, 19, 14, 28, 20, 25, 13, 16, 11, 20, 14, 24, 17, 21, 20, 21, 20, 18, 19, 13, 13, 16, 19, 18, 20, 23, 11, 19, 26, 21, 15, 22, 26, 11, 21, 21, 14, 28, 19, 20, 10, 19, 13, 20, 17, 32, 24, 20, 14, 16, 22, 22, 10, 25, 17, 18, 18, 16, 22, 22, 17, 11, 16, 18, 19, 20, 21, 20, 21, 20, 16, 14, 19, 21, 15, 22, 24, 21, 16, 20, 27, 10, 17, 15, 10, 24, 24, 19, 14, 22, 51, 20, 17, 21, 17, 13, 24, 16, 22, 13, 17, 14, 18, 23, 19, 32, 17, 22, 29, 26, 24, 21, 12, 25, 21, 18, 15, 14, 21, 13, 19, 28, 18, 15, 21, 20, 17, 13, 13, 17, 20, 16, 17, 17, 17, 19, 20, 29, 18, 20, 14, 14, 19, 20, 17, 17, 18, 25, 11, 13, 21, 18, 27, 17, 22, 19, 35, 16, 17, 26, 25, 17, 23, 19, 18, 26, 12, 17, 17, 20, 14, 17, 22, 11, 21, 24] 221


         1      -14535.6407             +nan
         2      -13442.7578       +1092.8828
         3      -13416.2091         +26.5488
         4      -13373.1466         +43.0625
         5      -13290.5422         +82.6044
         6      -13106.2543        +184.2879
         7      -12636.1720        +470.0824
         8      -11691.8598        +944.3122
         9      -10727.2438        +964.6160
        10      -10006.0668        +721.1770
        11       -9460.5523        +545.5145
        12       -9037.5505        +423.0018
        13       -8715.3540        +322.1964
        14       -8477.7387        +237.6153
        15       -8316.7054        +161.0333
        16       -8196.0410        +120.6644
        17       -8111.1818         +84.8592
        18       -8055.5179         +55.6640
        19       -8007.9867         +47.5311
        20       -7961.5805         +46.4063
        21       -7913.7220         +47.8585
        22       -7861.2066         +52.5153
        23

Training done


        86       -7613.6153          +0.0100


In [33]:
# Export model weight
for cname in class_names:
    if cname[:4] != 'test':
        model = models[cname]
        outfile = open(cname + 'MultinomialHMM.pkl', 'wb')
        pickle.dump(model, outfile)
        outfile.close()
outfile = open('kmeans.pkl', 'wb')
pickle.dump(kmeans, outfile)
outfile.close()

In [34]:
print("Testing")
to_test = ['test_phai', 'test_nguoi', 'test_cach', 'test_vietnam', 'test_benhnhan']
for true_cname in to_test:
    correct = 0
    failed = 0
    real_name = true_cname.split('_')[-1]

    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }

        match = True
        for key in score:
            if score[key] > score[real_name]:
                match = False
        if match:
            correct += 1
        else:
            failed += 1
            # print(real_name, score)

    acc = correct/(correct+failed)
    print(real_name + " : " + str(acc))

Testing
phai : 0.631578947368421
nguoi : 0.7985074626865671
cach : 0.71875
vietnam : 0.868421052631579
benhnhan : 0.75
