In [1]:
import librosa
import numpy as np
import os
import math
import pickle
from sklearn.cluster import KMeans
import hmmlearn.hmm
import random
from collections import defaultdict

result = defaultdict(list)


In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    ls = os.listdir(data_dir)
    files = [f for f in ls if f.endswith(".wav")]
    random.shuffle(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files]
    return mfcc

def clustering(X, n_clusters=20):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

In [3]:
class_names = [ "toi", "dich",  "benh_nhan", "nguoi", "theo"]
datas = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset", end=' - ')
    datas[cname] = get_class_data(os.path.join("train", cname))
#     dataset[cname] = get_class_data(os.path.join("train", cname))
    print(len(datas[cname]))
    datas[f"test_{cname}"] = datas[cname][-50:]
    datas[cname] = datas[cname][:-50]
    #datas[f"test_{cname}"] = get_class_data(os.path.join(path_to_data, f"test_{cname}"))
    
print("Done!!!")

Load toi dataset - 

  b = a[a_slice]
  b = a[a_slice]


113
Load dich dataset - 127
Load benh_nhan dataset - 196
Load nguoi dataset - 134
Load theo dataset - 116
Done!!!


In [4]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items() if k[:4]!='test'], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (19114, 36)
centers (10, 36)
centers (10, 36)
Done


In [5]:
dict_components = {
    # toi |t|~|o|~|i|
    "toi": 9,
    # dich |d|~|i|~|ch|
    "dich": 9,
    # nguoi |ng|~|uo|~|i|
    "nguoi": 9,
    # theo |th|~|e|~|o|
    "theo": 9,
    # benh_nhan |b|~|e|~|nh|~|silent|~|nh|~|a|~|n| 
    "benh_nhan": 18,
}

In [6]:
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    km = kmeans
    dataset[cname] = list([km.predict(v).reshape((-1,1)) for v in datas[cname]])
    dataset[f"test_{cname}"] = list([km.predict(v).reshape((-1,1)) for v in datas[f"test_{cname}"]])

    if cname[:4] != 'test':
        n = dict_components[cname]
        startprob = np.zeros(n)
        startprob[0] = 1.0
        transmat=np.diag(np.full(n,1))
        #transmat = np.array(dict_transmat[cname])
        
        hmm = hmmlearn.hmm.GMMHMM(
            n_components=n, 
            n_mix = 4, random_state=10, n_iter=500, verbose=True,
            params='mctw', init_params='mct',
            startprob_prior=startprob,
            transmat_prior=transmat,
        )
    
        X = np.concatenate(datas[cname])
        lengths = list([len(x) for x in datas[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        # FOR GMMHMM: NO NEED lengths parameter
        hmm.fit(X)
        models[cname] = hmm
        with open(os.path.join("models", cname + ".pkl"), "wb") as file: 
            pickle.dump(models[cname], file)
            print("Training done. Model has been dump to ", os.path.join("models", cname + ".pkl"))
print("Training done")

training class toi
(2122, 36) [26, 17, 26, 52, 32, 21, 38, 24, 62, 28, 52, 56, 12, 23, 51, 17, 18, 14, 18, 40, 32, 25, 49, 14, 36, 131, 15, 26, 27, 27, 20, 29, 131, 17, 19, 37, 24, 23, 39, 21, 38, 53, 21, 52, 31, 42, 32, 20, 23, 28, 37, 20, 33, 19, 30, 41, 23, 28, 16, 43, 48, 39, 36] 63


         1     -226165.1282             +nan
         2     -209475.2480      +16689.8802
         3     -204438.1693       +5037.0787
         4     -191003.3269      +13434.8424
         5     -190596.7645        +406.5624
         6     -190373.5770        +223.1875
         7     -190190.3750        +183.2020
         8     -190023.3077        +167.0673
         9     -189907.0705        +116.2371
        10     -189810.9967         +96.0739
        11     -189772.4698         +38.5268
        12     -189734.1312         +38.3386
        13     -189685.7709         +48.3603
        14     -189659.0319         +26.7390
        15     -189641.6012         +17.4307
        16     -189610.4432         +31.1580
        17     -189579.3271         +31.1161
        18     -189508.1741         +71.1531
        19     -189420.1604         +88.0137
        20     -189334.1474         +86.0130
        21     -189312.5350         +21.6124
        22     -189288.2698         +24.2652
        23

Training done. Model has been dump to  models/toi.pkl
training class dich
(3233, 36) [24, 40, 43, 26, 22, 24, 131, 25, 19, 21, 27, 18, 33, 28, 39, 27, 45, 24, 20, 42, 17, 27, 18, 118, 24, 18, 33, 110, 23, 114, 12, 32, 47, 19, 43, 36, 28, 44, 41, 118, 21, 31, 35, 26, 28, 29, 21, 20, 39, 127, 21, 16, 19, 22, 36, 25, 19, 23, 24, 42, 48, 33, 24, 19, 26, 22, 28, 19, 131, 105, 27, 20, 123, 33, 123, 114, 114] 77


         1     -327067.9740             +nan
         2     -298672.2465      +28395.7275
         3     -291730.2984       +6941.9481
         4     -290185.0786       +1545.2198
         5     -289493.8203        +691.2583
         6     -289145.3802        +348.4400
         7     -288928.8514        +216.5289
         8     -288741.6589        +187.1925
         9     -288601.4352        +140.2237
        10     -288460.8220        +140.6132
        11     -288350.2996        +110.5224
        12     -288252.2096         +98.0900
        13     -288167.1679         +85.0418
        14     -288086.0450         +81.1229
        15     -288012.0903         +73.9547
        16     -287957.1663         +54.9240
        17     -287910.1639         +47.0025
        18     -287873.9618         +36.2020
        19     -287828.8326         +45.1293
        20     -287783.5218         +45.3107
        21     -287765.0200         +18.5018
        22     -287740.0878         +24.9322
        23

Training done. Model has been dump to  models/dich.pkl
training class benh_nhan
(7260, 36) [32, 35, 40, 140, 41, 44, 52, 127, 41, 44, 43, 43, 45, 123, 33, 51, 41, 39, 32, 52, 48, 35, 44, 34, 27, 41, 52, 42, 46, 41, 48, 39, 54, 47, 64, 57, 32, 32, 41, 140, 29, 48, 110, 44, 44, 57, 40, 47, 59, 46, 33, 52, 67, 38, 38, 26, 28, 122, 37, 48, 46, 140, 33, 74, 35, 127, 36, 35, 47, 54, 31, 45, 37, 42, 58, 47, 55, 62, 44, 48, 25, 37, 44, 41, 42, 33, 32, 33, 71, 41, 46, 44, 51, 72, 41, 53, 30, 44, 48, 41, 28, 34, 46, 41, 63, 57, 29, 63, 127, 29, 43, 56, 30, 114, 51, 37, 30, 46, 38, 45, 51, 35, 45, 51, 43, 40, 34, 45, 29, 28, 40, 39, 37, 32, 36, 135, 39, 69, 35, 37, 47, 110, 44, 35, 43, 34] 146


         1     -745922.9359             +nan
         2     -689813.8620      +56109.0739
         3     -676915.5201      +12898.3418
         4     -672752.3366       +4163.1835
         5     -671018.9581       +1733.3785
         6     -670109.4439        +909.5142
         7     -669653.9484        +455.4955
         8     -669344.3628        +309.5856
         9     -669072.5191        +271.8437
        10     -668833.3787        +239.1404
        11     -668587.2487        +246.1300
        12     -668434.7400        +152.5086
        13     -668319.7002        +115.0398
        14     -668208.9119        +110.7883
        15     -668111.4362         +97.4757
        16     -668060.3677         +51.0685
        17     -667983.9062         +76.4615
        18     -667907.5064         +76.3998
        19     -667838.4731         +69.0333
        20     -667769.2094         +69.2637
        21     -667723.2001         +46.0093
        22     -667681.5258         +41.6743
        23

Training done. Model has been dump to  models/benh_nhan.pkl
training class nguoi
(3840, 36) [19, 22, 144, 29, 51, 24, 33, 30, 48, 14, 34, 44, 33, 15, 20, 122, 105, 54, 47, 118, 127, 22, 30, 26, 35, 38, 25, 36, 35, 25, 36, 127, 20, 16, 30, 135, 47, 40, 36, 118, 22, 25, 118, 22, 22, 33, 24, 17, 14, 21, 43, 20, 29, 18, 22, 38, 123, 23, 118, 135, 23, 25, 44, 53, 16, 32, 20, 135, 20, 135, 22, 135, 19, 36, 26, 18, 15, 22, 28, 25, 20, 20, 32, 22] 84


         1     -360107.9667             +nan
         2     -326459.8320      +33648.1347
         3     -319301.9720       +7157.8600
         4     -317388.6650       +1913.3070
         5     -316666.2444        +722.4206
         6     -316338.9721        +327.2723
         7     -316158.8924        +180.0797
         8     -316065.3681         +93.5244
         9     -316010.0625         +55.3055
        10     -315942.1012         +67.9613
        11     -315914.2659         +27.8353
        12     -315889.1636         +25.1023
        13     -315856.7957         +32.3680
        14     -315829.2636         +27.5321
        15     -315794.8152         +34.4483
        16     -315758.9295         +35.8858
        17     -315724.8695         +34.0600
        18     -315700.9964         +23.8731
        19     -315621.9041         +79.0923
        20     -315580.7672         +41.1370
        21     -315564.8349         +15.9323
        22     -315541.3560         +23.4788
        23

Training done. Model has been dump to  models/nguoi.pkl
training class theo
(2659, 36) [49, 27, 46, 34, 53, 37, 45, 30, 14, 22, 64, 57, 24, 118, 22, 29, 28, 19, 14, 32, 26, 11, 33, 34, 31, 17, 29, 48, 46, 25, 47, 21, 14, 17, 42, 42, 39, 57, 56, 13, 25, 114, 33, 11, 28, 32, 39, 114, 110, 38, 51, 53, 114, 79, 33, 50, 29, 30, 25, 21, 39, 23, 63, 23, 39, 31] 66


         1     -279394.6231             +nan
         2     -259276.0898      +20118.5334
         3     -253462.7126       +5813.3772
         4     -251572.9454       +1889.7672
         5     -250934.4862        +638.4592
         6     -250606.5617        +327.9245
         7     -250411.7411        +194.8206
         8     -250262.2720        +149.4691
         9     -250171.1370         +91.1349
        10     -250083.7692         +87.3679
        11     -250040.8814         +42.8877
        12     -249991.3678         +49.5136
        13     -249944.8659         +46.5020


Training done. Model has been dump to  models/theo.pkl
Training done


        14     -249929.5568         +15.3091
        15     -249930.2214          -0.6646


In [39]:
print("Testing")
result = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in datas[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == cname:
            true_predict += 1
#         print(true_cname, score, predict)
    result[true_cname] = f"QUANTITY: {true_predict}/{len(datas[true_cname])}\nACCURACY: {100*true_predict/len(datas[true_cname])}"

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

In [40]:
for k, v in result.items():
    print(k,'\n',v,'\n')

test_toi 
 QUANTITY: 39/50
ACCURACY: 78.0 

test_dich 
 QUANTITY: 50/50
ACCURACY: 100.0 

test_benh_nhan 
 QUANTITY: 50/50
ACCURACY: 100.0 

test_nguoi 
 QUANTITY: 49/50
ACCURACY: 98.0 

test_theo 
 QUANTITY: 49/50
ACCURACY: 98.0 

