In [42]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import pickle

In [36]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # get power
    power = librosa.feature.rms(y=y, frame_length=win_length, hop_length=hop_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1))
    # mfcc is 13 x T matrix now
    mfcc = np.concatenate([mfcc, power], axis=0)
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 39 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 39 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [37]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [38]:
class_names = ["benhnhan", "vietnam", "cach", "nguoi", "phai",
                   "test_benhnhan", "test_vietnam", "test_cach", "test_nguoi", "test_phai"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)

Load benhnhan dataset
Load vietnam dataset
Load cach dataset
Load nguoi dataset
Load phai dataset
Load test_benhnhan dataset
Load test_vietnam dataset
Load test_cach dataset
Load test_nguoi dataset
Load test_phai dataset
vectors (50279, 39)


In [39]:
models = {}

state_map = {"benhnhan" : 18, "vietnam" : 18, "cach" : 9, "nguoi" : 9, "phai" : 9,
                   "test_benhnhan" : 18, "test_vietnam" : 18, "test_cach" : 9, "test_nguoi" : 9, "test_phai" : 9}
for cname in class_names:
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    hmm = hmmlearn.hmm.GMMHMM(
        n_components=state_map[cname], n_mix = 2, random_state=42, n_iter=1000, verbose=True,
        params='mct',
        init_params='mct',
    )

    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class benhnhan
(6250, 39) [40, 45, 155, 41, 27, 58, 42, 56, 130, 39, 130, 122, 42, 32, 37, 33, 58, 45, 153, 35, 61, 52, 40, 50, 27, 59, 52, 50, 102, 39, 51, 48, 44, 44, 37, 50, 37, 35, 31, 65, 49, 55, 49, 41, 114, 50, 32, 47, 140, 65, 41, 34, 62, 130, 54, 39, 47, 50, 163, 48, 49, 35, 38, 40, 49, 40, 56, 54, 32, 119, 40, 39, 37, 47, 41, 38, 42, 43, 45, 40, 56, 36, 269, 45, 52, 46, 66, 58, 50, 68, 33, 38, 189, 42, 53, 44, 145, 37, 47, 51, 39, 145, 42, 36, 35] 105


         1     -579489.5887             +nan
         2     -505664.9492      +73824.6394
         3     -489604.9981      +16059.9511
         4     -483795.9709       +5809.0272
         5     -481064.0059       +2731.9650
         6     -479967.8097       +1096.1962
         7     -479275.1674        +692.6423
         8     -478886.1109        +389.0565
         9     -478554.8270        +331.2840
        10     -478355.7159        +199.1111
        11     -478238.1237        +117.5922
        12     -478116.7787        +121.3450
        13     -478048.2578         +68.5209
        14     -477986.0452         +62.2126
        15     -477888.8475         +97.1978
        16     -477855.5658         +33.2816
        17     -477831.9466         +23.6192
        18     -477813.3072         +18.6395
        19     -477792.7673         +20.5399
        20     -477773.8849         +18.8824
        21     -477755.5484         +18.3364
        22     -477734.2836         +21.2649
        23

training class vietnam
(11620, 39) [61, 65, 21, 115, 41, 56, 61, 49, 41, 42, 44, 42, 11, 360, 108, 77, 59, 43, 35, 49, 55, 53, 41, 51, 51, 31, 49, 55, 151, 56, 38, 45, 97, 47, 52, 36, 40, 63, 122, 81, 118, 61, 49, 51, 62, 107, 76, 61, 66, 91, 222, 51, 35, 36, 47, 29, 41, 101, 41, 39, 47, 42, 106, 122, 105, 44, 28, 37, 71, 81, 51, 51, 114, 128, 47, 45, 26, 43, 161, 46, 76, 53, 51, 56, 123, 90, 95, 46, 37, 57, 45, 44, 51, 66, 72, 115, 46, 101, 65, 44, 133, 33, 91, 36, 44, 40, 61, 43, 61, 62, 36, 93, 40, 165, 61, 75, 47, 137, 41, 36, 34, 51, 33, 54, 53, 48, 48, 37, 41, 65, 101, 46, 59, 45, 46, 41, 66, 66, 38, 49, 36, 126, 55, 121, 53, 50, 42, 168, 54, 51, 41, 67, 59, 49, 97, 49, 40, 39, 39, 55, 52, 38, 43, 49, 51, 111, 38, 51, 71, 72, 61, 71, 49, 35, 51, 37, 53, 52, 60, 38, 61, 39, 42, 42] 184


         1    -1101070.1586             +nan
         2     -982629.8255     +118440.3331
         3     -944598.7650      +38031.0605
         4     -930842.5288      +13756.2362
         5     -924035.8143       +6806.7145
         6     -920123.0357       +3912.7786
         7     -917507.7457       +2615.2901
         8     -915668.9823       +1838.7634
         9     -914652.1505       +1016.8317
        10     -913818.8536        +833.2969
        11     -913139.7039        +679.1498
        12     -912609.5795        +530.1243
        13     -912111.8881        +497.6914
        14     -911579.3393        +532.5488
        15     -911005.7829        +573.5564
        16     -910384.0548        +621.7281
        17     -909954.6085        +429.4463
        18     -909665.8019        +288.8066
        19     -909470.3124        +195.4895
        20     -909281.0881        +189.2243
        21     -909122.5856        +158.5025
        22     -908946.9310        +175.6547
        23

training class cach
(3474, 39) [16, 14, 15, 19, 21, 20, 13, 15, 13, 14, 16, 25, 22, 16, 14, 19, 16, 28, 15, 31, 12, 17, 17, 14, 13, 15, 16, 16, 16, 17, 15, 16, 20, 13, 16, 17, 18, 14, 15, 15, 17, 25, 19, 17, 16, 16, 17, 22, 15, 12, 13, 15, 13, 14, 15, 13, 22, 21, 12, 14, 18, 13, 18, 23, 16, 18, 14, 20, 20, 20, 17, 26, 19, 13, 14, 32, 15, 12, 15, 14, 18, 23, 25, 17, 12, 18, 22, 16, 17, 13, 14, 13, 16, 27, 17, 32, 11, 17, 13, 12, 19, 24, 21, 17, 26, 15, 16, 22, 15, 16, 20, 18, 15, 12, 23, 11, 12, 27, 12, 18, 14, 18, 29, 17, 14, 14, 19, 14, 15, 11, 15, 16, 17, 14, 15, 18, 19, 25, 13, 18, 15, 26, 12, 16, 15, 12, 12, 15, 15, 10, 19, 16, 14, 16, 13, 18, 16, 16, 16, 23, 12, 18, 28, 15, 19, 16, 10, 22, 12, 22, 21, 17, 14, 14, 19, 33, 16, 19, 21, 18, 13, 17, 15, 22, 15, 17, 18, 14, 15, 26, 16, 13, 13, 18, 17, 21, 14, 30, 15, 22, 17, 19] 202


         1     -350068.5015             +nan
         2     -326929.1273      +23139.3742
         3     -322107.2022       +4821.9251
         4     -319410.8474       +2696.3548
         5     -318410.2050       +1000.6425
         6     -317724.1755        +686.0295
         7     -317161.9773        +562.1982
         8     -316607.7027        +554.2746
         9     -316332.1074        +275.5953
        10     -316159.7198        +172.3875
        11     -316043.3559        +116.3639
        12     -315965.9850         +77.3709
        13     -315872.5888         +93.3962
        14     -315793.9158         +78.6729
        15     -315722.5593         +71.3565
        16     -315666.0159         +56.5434
        17     -315622.2828         +43.7331
        18     -315565.2008         +57.0820
        19     -315528.6188         +36.5820
        20     -315494.6986         +33.9202
        21     -315473.7122         +20.9864
        22     -315451.2280         +22.4842
        23

training class nguoi
(16044, 39) [29, 21, 24, 21, 18, 21, 28, 18, 33, 27, 36, 15, 18, 18, 25, 18, 30, 17, 18, 51, 16, 16, 25, 21, 19, 19, 27, 43, 19, 22, 16, 23, 36, 26, 20, 26, 22, 23, 58, 19, 24, 25, 12, 35, 17, 38, 34, 17, 59, 29, 31, 20, 20, 15, 22, 16, 20, 19, 31, 17, 16, 19, 19, 30, 14, 31, 21, 18, 47, 22, 20, 19, 32, 29, 19, 23, 23, 19, 27, 17, 27, 50, 40, 26, 18, 24, 22, 22, 16, 16, 23, 18, 16, 63, 26, 20, 28, 20, 19, 38, 31, 26, 21, 33, 28, 31, 21, 20, 28, 20, 25, 27, 14, 21, 20, 23, 23, 25, 26, 30, 36, 19, 15, 16, 14, 25, 25, 32, 31, 20, 40, 25, 16, 23, 22, 59, 62, 19, 23, 49, 21, 14, 16, 36, 16, 18, 17, 23, 22, 31, 20, 28, 24, 22, 19, 23, 17, 19, 29, 19, 54, 21, 27, 23, 22, 33, 28, 43, 23, 13, 21, 27, 30, 19, 45, 22, 36, 21, 18, 21, 19, 14, 27, 19, 42, 29, 20, 40, 21, 48, 35, 19, 42, 24, 19, 19, 58, 26, 20, 27, 17, 14, 17, 23, 17, 15, 23, 19, 29, 45, 31, 20, 45, 56, 16, 20, 21, 28, 23, 23, 21, 27, 30, 19, 22, 19, 21, 23, 22, 17, 18, 25, 18, 27, 55, 64, 21, 26, 19, 16, 24, 18

         1    -1539108.0713             +nan
         2    -1405221.6479     +133886.4234
         3    -1373974.5417      +31247.1063
         4    -1365708.0200       +8266.5217
         5    -1362958.4969       +2749.5231
         6    -1361703.3676       +1255.1293
         7    -1360896.3423        +807.0253
         8    -1360267.1621        +629.1802
         9    -1359746.1743        +520.9878
        10    -1359396.3741        +349.8002
        11    -1359138.7877        +257.5863
        12    -1358929.6860        +209.1017
        13    -1358754.7122        +174.9738
        14    -1358639.5597        +115.1525
        15    -1358559.5713         +79.9884
        16    -1358502.5241         +57.0472
        17    -1358460.8833         +41.6409
        18    -1358427.4553         +33.4280
        19    -1358400.5707         +26.8845
        20    -1358378.6605         +21.9102
        21    -1358358.3515         +20.3090
        22    -1358336.1058         +22.2457
        23

training class phai
(4258, 39) [14, 13, 17, 15, 18, 19, 10, 29, 18, 22, 17, 20, 19, 20, 17, 12, 28, 19, 27, 20, 12, 34, 20, 45, 20, 15, 20, 22, 20, 19, 20, 16, 24, 17, 17, 15, 16, 19, 15, 17, 26, 25, 22, 31, 19, 14, 19, 14, 28, 20, 25, 13, 16, 11, 20, 14, 24, 17, 21, 20, 21, 20, 18, 19, 13, 13, 16, 19, 18, 20, 23, 11, 19, 26, 21, 15, 22, 26, 11, 21, 21, 14, 28, 19, 20, 10, 19, 13, 20, 17, 32, 24, 20, 14, 16, 22, 22, 10, 25, 17, 18, 18, 16, 22, 22, 17, 11, 16, 18, 19, 20, 21, 20, 21, 20, 16, 14, 19, 21, 15, 22, 24, 21, 16, 20, 27, 10, 17, 15, 10, 24, 24, 19, 14, 22, 51, 20, 17, 21, 17, 13, 24, 16, 22, 13, 17, 14, 18, 23, 19, 32, 17, 22, 29, 26, 24, 21, 12, 25, 21, 18, 15, 14, 21, 13, 19, 28, 18, 15, 21, 20, 17, 13, 13, 17, 20, 16, 17, 17, 17, 19, 20, 29, 18, 20, 14, 14, 19, 20, 17, 17, 18, 25, 11, 13, 21, 18, 27, 17, 22, 19, 35, 16, 17, 26, 25, 17, 23, 19, 18, 26, 12, 17, 17, 20, 14, 17, 22, 11, 21, 24] 221


         1     -430271.4641             +nan
         2     -391290.2606      +38981.2035
         3     -386003.6724       +5286.5882
         4     -383994.4640       +2009.2084
         5     -382545.4413       +1449.0227
         6     -381522.6978       +1022.7435
         7     -381149.7588        +372.9390
         8     -380939.9246        +209.8342
         9     -380779.5697        +160.3549
        10     -380642.1344        +137.4354
        11     -380540.3899        +101.7445
        12     -380475.0975         +65.2924
        13     -380429.8919         +45.2056
        14     -380373.6155         +56.2763
        15     -380285.3138         +88.3018
        16     -380195.8710         +89.4428
        17     -380157.0986         +38.7723
        18     -380129.0422         +28.0565
        19     -380105.7783         +23.2639
        20     -380090.9238         +14.8545
        21     -380078.7618         +12.1620
        22     -380069.6229          +9.1389
        23

Training done


       114     -378392.0651          +0.0067


In [43]:
# Export model weight
for cname in class_names:
    if cname[:4] != 'test':
        model = models[cname]
        outfile = open(cname + 'GMMHMM.pkl', 'wb')
        pickle.dump(model, outfile)
        outfile.close()

In [40]:
print("Testing")
to_test = ['test_phai', 'test_nguoi', 'test_cach', 'test_vietnam', 'test_benhnhan']
for true_cname in to_test:
    correct = 0
    failed = 0
    real_name = true_cname.split('_')[-1]

    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }

        match = True
        for key in score:
            if score[key] > score[real_name]:
                match = False
        if match:
            correct += 1
        else:
            failed += 1
            # print(real_name, score)

    acc = correct/(correct+failed)
    print(real_name + " : " + str(acc))

Testing
phai : 0.8026315789473685
nguoi : 0.8731343283582089
cach : 0.5729166666666666
vietnam : 1.0
benhnhan : 0.8125
