In [1]:
import os
class_names = ['cothe', 'khong', 'nguoi', 'toi', 'nhung']
states = [12, 9, 9, 9, 6]

length = 0
for d in class_names:
    length += len(os.listdir("data/" + d))
print(length)

607


In [2]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix


In [3]:
all_data = {}
all_labels = {}
for cname in class_names:
    file_paths = [os.path.join("data", cname, i) for i in os.listdir(os.path.join('data', cname)) if i.endswith('.wav')]
    data = [get_mfcc(file_path) for file_path in file_paths]
    all_data[cname] = data
    all_labels[cname] = [class_names.index(cname) for i in range(len(file_paths))]

In [4]:
from sklearn.model_selection import train_test_split

X = {'train': {}, 'test': {}}
y = {'train': {}, 'test': {}}
for cname in class_names:
    x_train, x_test, _, y_test = train_test_split(
        all_data[cname], all_labels[cname], 
        test_size = 0.33, 
        random_state=42
    )
    X['train'][cname] = x_train
    X['test'][cname] = x_test
    y['test'][cname] = y_test

In [5]:
for cname in class_names:
    print(cname,len(X['train'][cname]), len(X['test'][cname]), len(y['test'][cname]))

cothe 97 48 48
khong 66 33 33
nguoi 94 47 47
toi 79 40 40
nhung 66 33 33


In [6]:
import hmmlearn.hmm as hmm

model = {}
for idx, cname in enumerate(class_names):
    start_prob = np.full(states[idx], 0.0)
    start_prob[0] = 1.0
    trans_matrix = np.full((states[idx], states[idx]), 0.0)
    p = 0.5
    np.fill_diagonal(trans_matrix, p)
    np.fill_diagonal(trans_matrix[0:, 1:], 1 - p)
    trans_matrix[-1, -1] = 1.0
    
    #trans matrix
    print(cname)
    print(trans_matrix) 

    model[cname] = hmm.GaussianHMM(
        n_components=states[idx], 
        verbose=True, 
        n_iter=300, 
        startprob_prior=start_prob, 
        transmat_prior=trans_matrix,
        params='stmc',
        init_params='mc',
        random_state=42
    )

    model[cname].fit(X=np.vstack(X['train'][cname]), lengths=[x.shape[0] for x in X['train'][cname]])

cothe
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -365604.4107             +nan
         2     -351596.5127      +14007.8979
         3     -349606.7960       +1989.7168
         4     -348815.7816        +791.0143
         5     -348360.1695        +455.6121
         6     -348132.8255        +227.3440
         7     -348004.3328        +128.4928
         8     -347900.1507        +104.1820
         9     -347806.4892         +93.6616
        10     -347733.0379         +73.4513
        11     -347623.8563        +109.1816
        12     -347490.6582        +133.1981
        13     -347381.6377        +109.0205
        14     -347299.5187         +82.1191
        15     -347238.8864         +60.6322
        16     -347175.0873         +63.7991
        17     -347114.5085         +60.5788
        18     -347023.9278         +90.5807
        19     -346940.1157         +83.8121
        20     -346851.9844         +88.1314
        21     -346780.7531         +71.2312
        22     -346704.2809         +76.4723
        23

khong
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -413182.0115             +nan
         2     -317089.2698      +96092.7417
         3     -206739.8700     +110349.3997
         4      -34281.9153     +172457.9547
         5       15945.7051      +50227.6204
         6       25186.2714       +9240.5664
         7       25780.5670        +594.2955
         8       25916.6651        +136.0982
         9       26068.1444        +151.4793
        10       26256.6360        +188.4915
        11       26395.1817        +138.5457
        12       26664.4637        +269.2820
        13       26966.0902        +301.6265
        14       26937.5574         -28.5327


nguoi
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -217813.1284             +nan
         2     -209157.3117       +8655.8167
         3     -208147.9770       +1009.3347
         4     -207806.5222        +341.4548
         5     -207424.0510        +382.4712
         6     -207303.1292        +120.9218
         7     -207200.8852        +102.2440
         8     -207142.2968         +58.5884
         9     -207096.5276         +45.7692
        10     -207041.4010         +55.1265
        11     -206978.3082         +63.0929
        12     -206894.1615         +84.1467
        13     -206840.6801         +53.4814
        14     -206822.4565         +18.2236
        15     -206808.8577         +13.5988
        16     -206803.2153          +5.6424
        17     -206800.1104          +3.1049
        18     -206798.5416          +1.5688
        19     -206797.6843          +0.8573
        20     -206797.4150          +0.2694
        21     -206778.8231         +18.5918
        22     -206776.1488          +2.6743
        23

toi
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -188477.8743             +nan
         2     -179580.5109       +8897.3633
         3     -177568.6987       +2011.8122
         4     -176840.8949        +727.8039
         5     -176475.8851        +365.0098
         6     -176201.0194        +274.8657
         7     -176014.1079        +186.9115
         8     -175933.5739         +80.5340
         9     -175918.2700         +15.3040
        10     -175913.1807          +5.0892
        11     -175927.8876         -14.7068


nhung
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]


         1     -139957.5606             +nan
         2     -134871.9913       +5085.5692
         3     -133760.6616       +1111.3297
         4     -133557.4086        +203.2531
         5     -133486.4107         +70.9979
         6     -133408.5543         +77.8564
         7     -133244.3397        +164.2146
         8     -133023.7229        +220.6168
         9     -132829.5320        +194.1908
        10     -132734.4166         +95.1154
        11     -132709.8468         +24.5699
        12     -132699.5407         +10.3061
        13     -132692.6589          +6.8818
        14     -132685.0396          +7.6193
        15     -132674.4174         +10.6221
        16     -132664.9179          +9.4995
        17     -132652.8582         +12.0597
        18     -132611.8375         +41.0207
        19     -132514.8572         +96.9803
        20     -132419.3892         +95.4680
        21     -132319.7027         +99.6865
        22     -132299.3319         +20.3708
        23

In [7]:
import pickle

# save model
for cname in class_names:
    name = f'models_train\model_{cname}.pkl'
    with open(name, 'wb') as file: 
        pickle.dump(model[cname], file)

In [8]:
import pickle, os
import numpy as np

from sklearn.metrics import classification_report

In [9]:
y_true = []
y_pred = []
for cname in class_names:
    for mfcc, target in zip(X['test'][cname], y['test'][cname]):
        scores = [model[cname].score(mfcc) for cname in class_names]
        pred = np.argmax(scores)
        y_pred.append(pred)
        y_true.append(target)
    print((np.array(y_true) == np.array(y_pred)).sum()/len(y_true))
print(y_true)
print(y_pred)


0.9791666666666666
0.9876543209876543
0.96875
0.9523809523809523
0.9552238805970149
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

In [10]:
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

              precision    recall  f1-score   support

       cothe       0.96      0.98      0.97        48
       khong       0.97      1.00      0.99        33
       nguoi       0.98      0.94      0.96        47
         toi       0.97      0.90      0.94        40
       nhung       0.89      0.97      0.93        33

    accuracy                           0.96       201
   macro avg       0.95      0.96      0.95       201
weighted avg       0.96      0.96      0.96       201



In [17]:
#loadmodels
import pickle

model_train = {}
for key in class_names:
    name = f"models\model_{key}.pkl"
    with open(name, 'rb') as file:
        model_train[key] = pickle.load(file)

In [18]:
print(model_train)

{'cothe': GaussianHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
            covars_weight=1, init_params='mc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=12, n_iter=300, params='stmc',
            random_state=42,
            startprob_prior=array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
            tol=0.01,
            transmat_prior=array([[0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0.5, 0....
       [0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. ],
       [0. , 0. , 0. , 0