In [14]:
import librosa
import numpy as np
import os
import math
import pickle
from sklearn.cluster import KMeans
import hmmlearn.hmm
import random
from collections import defaultdict

result = defaultdict(list)


In [20]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    ls = os.listdir(data_dir)
    files = [f for f in ls if f.endswith(".wav")]
    random.shuffle(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files]
    return mfcc

def clustering(X, n_clusters=20):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

In [21]:
class_names = [ "toi", "dich",  "benh_nhan", "nguoi", "theo",]# "test_ThanhPho", "test_Me", "test_Nha", "test_YTe", "test_Hoc",]

datas = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset", end=' - ')
    datas[cname] = get_class_data(os.path.join("train", cname))
#     dataset[cname] = get_class_data(os.path.join("train", cname))
    print(len(datas[cname]))
    datas[f"test_{cname}"] = datas[cname][-20:]
    datas[cname] = datas[cname][:-20]
    #datas[f"test_{cname}"] = get_class_data(os.path.join(path_to_data, f"test_{cname}"))
    
print("Done!!!")

Load toi dataset - 

  b = a[a_slice]


113
Load dich dataset - 127
Load benh_nhan dataset - 196
Load nguoi dataset - 134
Load theo dataset - 116
Done!!!


In [22]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items() if k[:4]!='test'], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (25433, 36)
centers (20, 36)
centers (20, 36)
Done


In [23]:
dict_components = {
    # toi |t|~|o|~|i|
    "toi": 9,
    # dich |d|~|i|~|ch|
    "dich": 9,
    # nguoi |ng|~|uo|~|i|
    "nguoi": 9,
    # theo |th|~|e|~|o|
    "theo": 9,
    # benh_nhan |b|~|e|~|nh|~|silent|~|nh|~|a|~|n| 
    "benh_nhan": 18,
}

In [24]:
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    km = kmeans
    dataset[cname] = list([km.predict(v).reshape((-1,1)) for v in datas[cname]])
    dataset[f"test_{cname}"] = list([km.predict(v).reshape((-1,1)) for v in datas[f"test_{cname}"]])

    if cname[:4] != 'test':
        n = dict_components[cname]
        startprob = np.zeros(n)
        startprob[0] = 1.0
        transmat=np.diag(np.full(n,1))
        #transmat = np.array(dict_transmat[cname])
        
        hmm = hmmlearn.hmm.GMMHMM(
            n_components=n, 
            n_mix=4, random_state=10, n_iter=500, verbose=True,
            params='mctw', init_params='mct',
            startprob_prior=startprob,
            transmat_prior=transmat,
        )
    
        X = np.concatenate(datas[cname])
#         lengths = list([len(x) for x in datas[cname]])
        print("training class", cname)
#         print(X.shape, lengths, len(lengths))
        # FOR GMMHMM: NO NEED lengths parameter
        hmm.fit(X)
        models[cname] = hmm
        with open(os.path.join("models", cname + ".pkl"), "wb") as file: 
            pickle.dump(models[cname], file)
            print("Training done. Model has been dump to ", os.path.join("models", cname + ".pkl"))
print("Training done")

training class toi


         1     -367742.3158             +nan
         2     -337445.1928      +30297.1230
         3     -329196.1287       +8249.0641
         4     -327185.4218       +2010.7068
         5     -326314.5813        +870.8405
         6     -325892.9649        +421.6164
         7     -325577.0951        +315.8698
         8     -325346.4981        +230.5971
         9     -325205.5990        +140.8991
        10     -325142.7601         +62.8389
        11     -325040.2495        +102.5106
        12     -324992.5098         +47.7397
        13     -324981.3435         +11.1663
        14     -324957.6095         +23.7340
        15     -324928.5361         +29.0734
        16     -324878.6600         +49.8762
        17     -324859.4377         +19.2223
        18     -324839.3367         +20.1010
        19     -324816.0665         +23.2701
        20     -324806.2257          +9.8408
        21     -324800.5219          +5.7038
        22     -324787.6847         +12.8373
        23

Training done. Model has been dump to  models/toi.pkl
training class dich


         1     -456320.7583             +nan
         2     -418158.7573      +38162.0010
         3     -407563.6518      +10595.1054
         4     -404628.2744       +2935.3774
         5     -403737.8934        +890.3810
         6     -403317.1652        +420.7282
         7     -402954.3177        +362.8475
         8     -402727.1143        +227.2034
         9     -402603.5734        +123.5409
        10     -402504.7294         +98.8441
        11     -402431.7807         +72.9487
        12     -402375.5179         +56.2628
        13     -402328.9396         +46.5783
        14     -402271.4255         +57.5140
        15     -402194.5833         +76.8422
        16     -402161.8264         +32.7569
        17     -402135.6455         +26.1809
        18     -402113.2248         +22.4207
        19     -402090.6247         +22.6001
        20     -402066.8708         +23.7539
        21     -402036.9321         +29.9387
        22     -402015.5070         +21.4251
        23

Training done. Model has been dump to  models/dich.pkl
training class benh_nhan


         1     -906063.8828             +nan
         2     -837547.9132      +68515.9696
         3     -820800.1513      +16747.7619
         4     -815981.6493       +4818.5020
         5     -814124.5497       +1857.0996
         6     -813225.2655        +899.2842
         7     -812614.1005        +611.1650
         8     -812173.7436        +440.3568
         9     -811857.7261        +316.0175
        10     -811578.9479        +278.7782
        11     -811320.2960        +258.6519
        12     -811102.9542        +217.3417
        13     -810952.1272        +150.8271
        14     -810821.1432        +130.9839
        15     -810690.4529        +130.6903
        16     -810590.2919        +100.1611
        17     -810510.4393         +79.8526
        18     -810435.7171         +74.7222
        19     -810383.8700         +51.8471
        20     -810319.6126         +64.2574
        21     -810212.7822        +106.8303
        22     -810164.6779         +48.1043
        23

Training done. Model has been dump to  models/benh_nhan.pkl
training class nguoi


         1     -458046.8193             +nan
         2     -415687.9189      +42358.9005
         3     -407365.2792       +8322.6397
         4     -405794.0738       +1571.2054
         5     -405194.6857        +599.3881
         6     -404872.3371        +322.3486
         7     -404594.6255        +277.7116
         8     -404398.0150        +196.6105
         9     -404223.3740        +174.6410
        10     -404134.4501         +88.9239
        11     -404096.8419         +37.6081
        12     -404067.1294         +29.7125
        13     -404050.5592         +16.5702
        14     -404027.8382         +22.7210
        15     -404008.5309         +19.3073
        16     -403990.0209         +18.5100
        17     -403966.9233         +23.0976
        18     -403961.1971          +5.7262
        19     -403950.7045         +10.4926
        20     -403926.5731         +24.1313
        21     -403898.0348         +28.5384
        22     -403850.1562         +47.8786
        23

Training done. Model has been dump to  models/nguoi.pkl
training class theo


         1     -400158.7514             +nan
         2     -369934.1331      +30224.6183
         3     -360274.5961       +9659.5370
         4     -357770.1656       +2504.4305
         5     -356756.0412       +1014.1244
         6     -356270.6506        +485.3907
         7     -356018.0608        +252.5898
         8     -355851.5647        +166.4961
         9     -355754.6695         +96.8952
        10     -355614.0794        +140.5902
        11     -355475.5499        +138.5295
        12     -355306.4153        +169.1345
        13     -355211.3086         +95.1068
        14     -355117.1536         +94.1550
        15     -355008.0428        +109.1108
        16     -354916.1664         +91.8764
        17     -354819.7175         +96.4490
        18     -354701.1485        +118.5690
        19     -354500.4624        +200.6862
        20     -354279.2350        +221.2273
        21     -353780.1884        +499.0466
        22     -352405.2450       +1374.9434
        23

Training done. Model has been dump to  models/theo.pkl
Training done


       125      181702.9712          +0.0099


In [16]:
print("Testing")
result = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in datas[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == cname:
            true_predict += 1
#         print(true_cname, score, predict)
    result[true_cname] = f"QUANTITY: {true_predict}/{len(datas[true_cname])}\nACCURACY: {100*true_predict/len(datas[true_cname])}"

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

In [17]:
for k, v in result.items():
    print(k,'\n',v,'\n')

test_toi 
 QUANTITY: 20/20
ACCURACY: 100.0 

test_dich 
 QUANTITY: 20/20
ACCURACY: 100.0 

test_benh_nhan 
 QUANTITY: 19/20
ACCURACY: 95.0 

test_nguoi 
 QUANTITY: 18/20
ACCURACY: 90.0 

test_theo 
 QUANTITY: 20/20
ACCURACY: 100.0 

