In [2]:
!pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.2.3-cp37-cp37m-macosx_10_13_x86_64.whl (120 kB)
[K     |████████████████████████████████| 120 kB 598 kB/s eta 0:00:01
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.2.3


In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import pickle as pk
import hmmlearn.hmm

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix


In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
#     print(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans 

In [8]:
class_names = ["toi", "dich", "nguoi", "benh_nhan", "theo", "test_toi", "test_nguoi", "test_dich", "test_theo", "test_benh_nhan"]
dataset = {}
train_dataset = {}
models = {}
original_dataset = {}
for cname in class_names:
    dataset[cname] = get_class_data(os.path.join("train", cname))
    if cname[:4] != "test":
#print(f"Load {cname} dataset to train")
        train_dataset[cname] = get_class_data(os.path.join("train", cname))

#   Get all vectors in the datasets
#   all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
#   print("vectors", all_vectors.shape)
#   # Run K-Means algorithm to get clusters
#   kmeans = clustering(all_vectors)
#   print("centers", kmeans.cluster_centers_.shape)

# Get all vectors in the datasets
all_train_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in train_dataset.items()], axis=0)
print("vectors", all_train_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_train_vectors)
print("centers", kmeans.cluster_centers_.shape)

for cname in class_names:
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['cname'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])

  b = a[a_slice]


vectors (13768, 36)
centers (10, 36)
centers (10, 36)


In [9]:
lable_names = ["toi", "dich", "nguoi", "benh_nhan", "theo"]
for cname in lable_names:
    with open(os.path.join("Models", cname + ".pkl"), "rb") as file: models[cname] = pk.load(file)

In [35]:
# toi |t|~|o|~|i|
# original_dataset['toi'] = dataset['toi'].copy()
cname = 'toi'
# # convert all vectors to the cluster index
# dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])

hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=9, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm = hmmlearn.hmm.MultinomialHMM(
  n_components=9, init_params='e', params='ste', verbose=True
) 
hmm.startprob_ = np.array([0.6, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ = np.array([
    [0.6, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.6, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.6, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.6, 0.2, 0.2, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.6, 0.2, 0.2, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.2, 0.2, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.2, 0.2],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.4],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
])

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
models[cname] = hmm
with open(os.path.join("Models", cname + ".pkl"), "wb") as file: pk.dump(models[cname], file)

training class toi
(2230, 1) [31, 41, 28, 23, 29, 17, 43, 39, 19, 20, 37, 36, 22, 26, 40, 28, 21, 32, 23, 19, 31, 38, 20, 33, 39, 23, 51, 53, 38, 27, 31, 32, 22, 45, 32, 55, 32, 42, 67, 18, 69, 53, 24, 46, 52, 24, 26, 40, 31, 35, 18, 12, 47, 21, 17, 27, 30, 33, 25, 20, 23, 18, 24, 38, 48, 26, 17, 21, 62] 69


         1       -5086.3350             +nan
         2       -3963.0501       +1123.2849
         3       -3399.2908        +563.7593
         4       -3272.8954        +126.3954
         5       -3207.1713         +65.7241
         6       -3166.6254         +40.5459
         7       -3112.4875         +54.1379
         8       -3018.8140         +93.6735
         9       -2896.4917        +122.3224
        10       -2786.0245        +110.4672


In [30]:
np.set_printoptions(precision=2, suppress=True)
print(models['toi'].transmat_)

[[0.73 0.21 0.06 0.   0.   0.   0.   0.   0.  ]
 [0.   0.89 0.01 0.1  0.   0.   0.   0.   0.  ]
 [0.   0.   0.81 0.09 0.11 0.   0.   0.   0.  ]
 [0.   0.   0.   0.38 0.5  0.11 0.   0.   0.  ]
 [0.   0.   0.   0.   0.48 0.19 0.33 0.   0.  ]
 [0.   0.   0.   0.   0.   0.82 0.   0.18 0.  ]
 [0.   0.   0.   0.   0.   0.   0.9  0.01 0.09]
 [0.   0.   0.   0.   0.   0.   0.   0.91 0.09]
 [0.   0.   0.   0.   0.   0.   0.   0.   1.  ]]


In [31]:
# #nguoi |ng|~|uo|~|i|

# hmm = hmmlearn.hmm.MultinomialHMM(
#   n_components=15, init_params='e', params='ste', verbose=True
# )
# hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
# hmm.transmat_ = np.array([
#     [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],    
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],    
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],    
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
#   ])

In [32]:
# # dich |d|~|i|~|ch|

# hmm = hmmlearn.hmm.MultinomialHMM(
#   n_components=9, init_params='e', params='ste', verbose=True
# )
# hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
# hmm.transmat_ = np.array([
#     [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
#   ])

In [36]:
# theo |th|~|e|~|o|
cname = "theo"

hmm = hmmlearn.hmm.MultinomialHMM(
  n_components=9, init_params='e', params='ste', verbose=True
)
hmm.startprob_ = np.array([0.6, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ = np.array([
    [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
  ])

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
models[cname] = hmm
with open(os.path.join("Models", cname + ".pkl"), "wb") as file: pk.dump(models[cname], file)

training class theo
(2752, 1) [39, 30, 28, 46, 57, 26, 64, 27, 29, 27, 80, 19, 21, 25, 50, 18, 27, 27, 31, 21, 15, 42, 37, 32, 48, 39, 33, 39, 25, 23, 39, 30, 11, 33, 22, 34, 25, 42, 46, 33, 28, 25, 13, 14, 21, 40, 56, 57, 63, 30, 22, 32, 21, 35, 11, 98, 23, 79, 22, 31, 15, 39, 39, 38, 34, 14, 26, 28, 40, 39, 21, 29, 22, 51, 24, 14, 30, 23, 30, 21, 25, 42, 30, 17] 84


         1       -6515.6990             +nan
         2       -4759.6492       +1756.0498
         3       -3992.7344        +766.9148
         4       -3603.8381        +388.8964
         5       -3463.5388        +140.2992
         6       -3397.8179         +65.7210
         7       -3359.0511         +38.7668
         8       -3336.5148         +22.5362
         9       -3319.5827         +16.9322
        10       -3302.6019         +16.9807


In [None]:
# # benh_nhan |b|~|e|~|nh|~|silent|~|nh|~|a|~|n| 

# hmm = hmmlearn.hmm.MultinomialHMM(
#     n_components=21, init_params='e', params='ste', verbose=True
# )
# hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
# hmm.transmat_ = np.array([
#     [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],    
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],    
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],    
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
#     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
# ])

In [10]:
# for cname in class_names:
# class_vectors = dataset[cname]
# # convert all vectors to the cluster index
# # dataset['cname'] = [O^1, ... O^R]
# # O^r = (c1, c2, ... ct, ... cT)
# # O^r size T x 1
# dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])


# if cname[:4] != 'test':
#     X = np.concatenate(dataset[cname])
#     lengths = list([len(x) for x in dataset[cname]])
#     #       print("training class", cname)
#     #       print(X.shape, lengths, len(lengths))
#     hmm.fit(X, lengths=lengths)
#     models[cname] = hmm
#     with open(os.path.join("Models", cname + ".pkl"), "wb") as file: pk.dump(models[cname], file)

# #       print("Training done")

print(class_names)
print("Testing and Labeling")
for true_cname in class_names:
    if true_cname[:4] == "test":
        print("==================================")
        print(true_cname)
        print("==================================")

        lname = true_cname[5:]
        totalWord = 0
        true = 0
        accuracy = 0

        for O in dataset[true_cname]:
            totalWord += 1
            scores = {}
            for cname, model in models.items():
                if cname[:4] != "test":
                    score = model.score(O, [len(O)])
                    scores[cname] = score
    #                 print(scores)
            srt = sorted(scores.items(), key=lambda x: x[1], reverse=True)
        #         print(srt[0])
            if srt[0][0] == lname:
                true += 1
        accuracy = true/totalWord
        print("--------------------------------------------")
        print("!note: test_folder must contain wavs that it records exactly the word which be trained")
        print("accuracy: ", accuracy, ",true: ", true, ",total_word: ", totalWord)

['toi', 'dich', 'nguoi', 'benh_nhan', 'theo', 'test_toi', 'test_nguoi', 'test_dich', 'test_theo', 'test_benh_nhan']
Testing and Labeling
test_toi
--------------------------------------------
!note: test_folder must contain wavs that it records exactly the word which be trained
accuracy:  0.1485148514851485 ,true:  15 ,total_word:  101
test_nguoi
--------------------------------------------
!note: test_folder must contain wavs that it records exactly the word which be trained
accuracy:  0.8961038961038961 ,true:  69 ,total_word:  77
test_dich
--------------------------------------------
!note: test_folder must contain wavs that it records exactly the word which be trained
accuracy:  0.9714285714285714 ,true:  34 ,total_word:  35
test_theo
--------------------------------------------
!note: test_folder must contain wavs that it records exactly the word which be trained
accuracy:  0.8958333333333334 ,true:  43 ,total_word:  48
test_benh_nhan
--------------------------------------------
!n