In [85]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [504]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [533]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [534]:
def clustering(X, n_clusters=14):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [535]:
class_names = ["nguoi", "duoc", "co_the", "khong", "benh_nhan"]
dataset = {}

for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("hmm_data", cname))

Load nguoi dataset
Load duoc dataset
Load co_the dataset
Load khong dataset
Load benh_nhan dataset


In [536]:
all_vector = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("all_vector", all_vector.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vector)


all_vector (15157, 36)
centers (14, 36)


In [537]:
# def get_start_config(cname):
# #     if cname == 'va' :
# #         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0])
# #         transmat=np.array([
# #             [0.7,0.2,0.1,0.0,0.0,0.0],
# #             [0.0,0.7,0.2,0.1,0.0,0.0],
# #             [0.0,0.0,0.7,0.2,0.1,0.0],
# #             [0.0,0.0,0.0,0.7,0.2,0.1],
# #             [0.0,0.0,0.0,0.0,0.7,0.3],
# #             [0.0,0.0,0.0,0.0,0.0,1.0],

# #         ])
# #         return startprob, transmat
    
# #     if cname == 'cua' :
# #         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
# #         transmat=np.array([
# #             [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
# #             [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
# #             [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
# #             [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
# #             [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
# #             [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
# #             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
# #             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
# #             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
# #         ])
# #         return startprob, transmat
#     if cname == 'duoc':
#         startprob=np.array([0.6,0.2,0.1,0.1,0.0,0.0, 0.0,0.0,0.0])
#         transmat=np.array([
#             [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'nguoi':
#         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
#         transmat=np.array([
#             [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'co_the':
#         startprob=np.array([0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
#         transmat=np.array([
#             [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'khong':
#         startprob=np.array([0.6,0.2,0.1,0.1,0.0,0.0, 0.0,0.0,0.0])
#         transmat=np.array([
#             [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'benh_nhan':
#         startprob=np.array([0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
#         transmat=np.array([
#             [0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.2,0.1,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     return None

In [538]:
# def get_start_config(cname):
# #     if cname == 'va' :
# #         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0])
# #         transmat=np.array([
# #             [0.7,0.2,0.1,0.0,0.0,0.0],
# #             [0.0,0.7,0.2,0.1,0.0,0.0],
# #             [0.0,0.0,0.7,0.2,0.1,0.0],
# #             [0.0,0.0,0.0,0.7,0.2,0.1],
# #             [0.0,0.0,0.0,0.0,0.7,0.3],
# #             [0.0,0.0,0.0,0.0,0.0,1.0],

# #         ])
# #         return startprob, transmat
    
# #     if cname == 'cua' :
# #         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
# #         transmat=np.array([
# #             [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
# #             [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
# #             [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
# #             [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
# #             [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
# #             [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
# #             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
# #             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
# #             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
# #         ])
# #         return startprob, transmat
#     if cname == 'duoc':
#         startprob=np.array([0.6,0.2,0.1,0.1,0.0,0.0, 0.0,0.0,0.0])
#         transmat=np.array([
#             [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'nguoi':
#         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
#         transmat=np.array([
#             [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'co_the':
#         startprob=np.array([0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
#         transmat=np.array([
#             [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'khong':
#         startprob=np.array([0.6,0.2,0.1,0.1,0.0,0.0, 0.0,0.0,0.0])
#         transmat=np.array([
#             [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     if cname == 'benh_nhan':
#         startprob=np.array([0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
#         transmat=np.array([
#             [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
#     return None

In [539]:
def get_start_config(cname):
    if cname == 'va' :
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,1.0],

        ])
        return startprob, transmat
    
#     if cname == 'cua' :
#         startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
#         transmat=np.array([
#             [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])
#         return startprob, transmat
    if cname == 'duoc':
        startprob=np.array([0.5,0.4,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
        transmat=np.array([
            [0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'nguoi':
        startprob=np.array([0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'co_the':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'khong':
        startprob=np.array([0.5,0.4,0.1,0.0,0.0,0.0, 0.0,0.0,0.0])
        transmat=np.array([
            [0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.5,0.4,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    if cname == 'benh_nhan':
        startprob=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        transmat=np.array([
            [0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.2,0.1,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])
        return startprob, transmat
    return None

In [540]:
trainset={}
testset={}

for cname in class_names :
    n = len(dataset[cname])
    ntrain = math.floor(n*0.7)
    trainset[cname] = dataset[cname][:ntrain]
    testset[cname] = dataset[cname][ntrain:]

In [541]:
def get_ncomponent(cname) :
    if cname == 'va' :
        return 2    
    if cname == 'cua' :
        return 3
    if cname == 'duoc':
        return 3
    if cname == 'nguoi':
        return 4
    if cname == 'khong':
        return 3
    if cname == 'benh_nhan':
        return 6
    if cname == 'co_the':
        return 4

In [542]:
models = {}
for cname in class_names:
    class_vectors = trainset[cname]
    
    trainset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in testset[cname]])
    startprob_, transmat_ = get_start_config(cname)
    n_components = get_ncomponent(cname)
    print(n_components)
    print(transmat_.shape)
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=n_components*3, random_state=0,n_iter=1000, verbose=True)
    hmm.startprob_ = startprob_
    hmm.transmat_ = transmat_
    hmm.init_params = 'e'
    hmm.params = 'ste'
#         if cname[:4] != 'test':
    X = np.concatenate(trainset[cname])
    lengths = list([len(x) for x in trainset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
    
print("Training done")

4
(12, 12)
training class nguoi
(1781, 1) [22, 17, 39, 29, 24, 30, 17, 36, 22, 30, 20, 13, 27, 20, 19, 18, 36, 26, 20, 23, 30, 21, 17, 25, 34, 19, 40, 20, 49, 41, 53, 33, 18, 24, 16, 21, 28, 26, 28, 41, 43, 27, 26, 32, 28, 18, 19, 28, 20, 14, 20, 16, 27, 21, 42, 19, 18, 21, 19, 21, 15, 19, 21, 21, 22, 30, 23, 29, 23, 27] 70


         1       -4595.8534             +nan
         2       -3460.9060       +1134.9474
         3       -3186.4809        +274.4251
         4       -3005.2014        +181.2795
         5       -2855.6302        +149.5712
         6       -2747.1311        +108.4991
         7       -2673.7763         +73.3548
         8       -2628.7302         +45.0461
         9       -2596.0412         +32.6890
        10       -2566.3031         +29.7381
        11       -2547.2984         +19.0047
        12       -2525.2574         +22.0410
        13       -2504.9230         +20.3344
        14       -2492.0413         +12.8817
        15       -2479.2782         +12.7632
        16       -2473.2732          +6.0050
        17       -2469.4080          +3.8652
        18       -2465.5448          +3.8632
        19       -2461.4595          +4.0854
        20       -2458.4051          +3.0544
        21       -2455.9166          +2.4885
        22       -2452.1813          +3.7353
        23

3
(9, 9)
training class duoc
(1104, 1) [13, 23, 15, 21, 17, 14, 24, 20, 13, 16, 10, 41, 15, 15, 21, 17, 18, 20, 21, 17, 13, 16, 17, 15, 13, 16, 16, 16, 10, 20, 19, 12, 17, 14, 14, 19, 12, 14, 15, 14, 23, 17, 17, 22, 15, 21, 14, 13, 13, 15, 19, 17, 15, 14, 12, 15, 17, 17, 32, 24, 19, 18, 16, 14, 12] 65


         3       -2004.0692        +265.4976
         4       -1913.4981         +90.5711
         5       -1880.5499         +32.9482
         6       -1863.3514         +17.1985
         7       -1852.5051         +10.8463
         8       -1845.2948          +7.2103
         9       -1840.4460          +4.8488
        10       -1836.9679          +3.4781
        11       -1834.2179          +2.7501
        12       -1831.8168          +2.4011
        13       -1829.5810          +2.2357
        14       -1827.4506          +2.1304
        15       -1825.3898          +2.0608
        16       -1823.3341          +2.0557
        17       -1821.2175          +2.1166
        18       -1819.0686          +2.1490
        19       -1817.0710          +1.9976
        20       -1815.4336          +1.6374
        21       -1814.2160          +1.2175
        22       -1813.3483          +0.8678
        23       -1812.7348          +0.6134
        24       -1812.3015          +0.4334
        25

4
(12, 12)
training class co_the
(2720, 1) [51, 27, 62, 36, 28, 25, 38, 43, 39, 27, 30, 44, 45, 34, 52, 25, 22, 53, 40, 48, 43, 45, 35, 37, 57, 20, 36, 36, 36, 30, 23, 26, 53, 37, 41, 34, 36, 31, 26, 56, 25, 47, 39, 35, 25, 30, 39, 28, 34, 30, 36, 41, 37, 56, 47, 30, 37, 47, 42, 34, 37, 26, 39, 46, 40, 36, 52, 35, 45, 35, 44, 39] 72


         1       -7142.8009             +nan
         2       -5651.0841       +1491.7168
         3       -5093.7874        +557.2967
         4       -4798.9196        +294.8678
         5       -4617.2175        +181.7021
         6       -4451.0212        +166.1962
         7       -4339.5251        +111.4962
         8       -4280.8553         +58.6697
         9       -4233.3763         +47.4790
        10       -4181.4469         +51.9295
        11       -4132.9070         +48.5399
        12       -4098.0313         +34.8757
        13       -4080.6969         +17.3344
        14       -4074.7624          +5.9345
        15       -4072.3751          +2.3873
        16       -4070.7563          +1.6188
        17       -4069.2726          +1.4838
        18       -4067.7856          +1.4869
        19       -4066.4496          +1.3360
        20       -4060.4814          +5.9683
        21       -4045.3105         +15.1708
        22       -4043.8034          +1.5072
        23

3
(9, 9)
training class khong
(2230, 1) [27, 36, 30, 20, 35, 24, 23, 25, 32, 26, 22, 37, 42, 32, 44, 28, 20, 28, 46, 42, 49, 34, 31, 31, 31, 42, 42, 17, 28, 35, 24, 38, 47, 26, 24, 43, 45, 24, 37, 27, 28, 27, 43, 21, 28, 26, 35, 33, 24, 40, 21, 31, 55, 31, 27, 30, 64, 24, 28, 27, 26, 23, 30, 30, 30, 30, 37, 21, 29, 37] 70


         1       -5986.0980             +nan
         2       -4701.4501       +1284.6479
         3       -4200.2617        +501.1884
         4       -3985.7669        +214.4948
         5       -3863.5236        +122.2434
         6       -3798.5872         +64.9363
         7       -3763.8547         +34.7325
         8       -3741.1545         +22.7002
         9       -3727.0857         +14.0688
        10       -3718.7361          +8.3496
        11       -3713.2603          +5.4757
        12       -3708.7776          +4.4827
        13       -3697.1920         +11.5857
        14       -3687.8297          +9.3623
        15       -3673.2012         +14.6285
        16       -3650.9420         +22.2592
        17       -3636.8320         +14.1100
        18       -3628.7454          +8.0866
        19       -3625.8433          +2.9021
        20       -3625.0200          +0.8233
        21       -3624.6159          +0.4041
        22       -3624.3472          +0.2687
        23

6
(18, 18)
training class benh_nhan
(2761, 1) [38, 29, 41, 52, 55, 44, 44, 45, 32, 37, 45, 45, 34, 38, 45, 28, 30, 41, 43, 32, 32, 41, 32, 31, 30, 34, 26, 45, 30, 45, 44, 30, 33, 40, 47, 37, 35, 30, 42, 29, 43, 42, 43, 31, 42, 41, 44, 47, 43, 34, 41, 47, 33, 41, 54, 35, 62, 28, 39, 51, 45, 52, 35, 41, 44, 40, 41, 44, 32, 35] 70


         1       -7257.4060             +nan
         2       -5652.2605       +1605.1455
         3       -4943.6552        +708.6053
         4       -4602.0984        +341.5569
         5       -4449.8874        +152.2109
         6       -4365.3119         +84.5755
         7       -4317.1722         +48.1397
         8       -4277.6139         +39.5584
         9       -4234.0139         +43.6000
        10       -4188.4775         +45.5364
        11       -4158.5484         +29.9291
        12       -4129.7704         +28.7780
        13       -4106.4409         +23.3295
        14       -4094.8016         +11.6393
        15       -4085.5723          +9.2292
        16       -4076.0060          +9.5663
        17       -4065.1617         +10.8444
        18       -4052.5489         +12.6127
        19       -4039.0293         +13.5196
        20       -4028.3700         +10.6594
        21       -4020.9042          +7.4657
        22       -4016.3531          +4.5511
        23

Training done


        69       -4003.1306          +0.0098


In [543]:

#print(len(testset['khong']))
print("Testing")
true_names=["duoc", "nguoi", "co_the", "benh_nhan", "khong"]
prid_correct={'duoc' : 0, 'nguoi':0, 'co_the':0, 'benh_nhan':0, 'khong':0}
# #true_label=[1, 2, 3, 4]
for true_cname in true_names:
    for O in testset[true_cname]:
        score = {cname : model.score(O) for cname, model in models.items()}
        if (str(max(score, key=score.get)) == true_cname):
            prid_correct[str(max(score, key=score.get))]+=1
        print(true_cname, ' predict ', max(score, key=score.get))
        #print(prid_correct)

# for true_cname in true_names:
#     acc = 
# prid_correct['duoc'] += 1
# print (prid_correct)

# for true_cname in true_names:
#     print (true_cname, " : ", prid_correct[true_cname]/len(testset[true_cname]))

Testing
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  benh_nhan
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  co_the
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  duoc
duoc  predict  co_the
duoc  predict  duoc
duoc  predict  duoc
nguoi  predict  co_the
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  co_the
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  benh_nhan
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi
nguoi  predict  nguoi

In [546]:
avg = 0
for true_cname in true_names:
    avg += (prid_correct[true_cname]/len(testset[true_cname]))
    print (true_cname, " : ", prid_correct[true_cname]/len(testset[true_cname]))
print('avg = ', avg/5)

duoc  :  0.8928571428571429
nguoi  :  0.8
co_the  :  0.967741935483871
benh_nhan  :  0.9
khong  :  0.8
avg =  0.8721198156682028


In [545]:
avg = 0
for true_cname in true_names:
    avg += (prid_correct[true_cname]/len(testset[true_cname]))
print('avg = ', avg/5)

avg =  0.8721198156682028


In [532]:
# import threading
# import pyaudio
# import wave

# class App():
#     chunk = 1024 
#     sample_format = pyaudio.paInt16 
#     channels = 2
#     fs = 44100  
    
#     frames = []  
#     def __init__(self, master):
#         self.isrecording = False
#         self.button1 = tk.Button(main, text='rec',command=self.startrecording)
#         self.button2 = tk.Button(main, text='stop',command=self.stoprecording)
      
#         self.button1.pack()
#         self.button2.pack()

#     def startrecording(self):
#         self.p = pyaudio.PyAudio()  
#         self.stream = self.p.open(format=self.sample_format,channels=self.channels,rate=self.fs,frames_per_buffer=self.chunk,input=True)
#         self.isrecording = True
        
#         print('Recording')
#         t = threading.Thread(target=self.record)
#         t.start()

#     def stoprecording(self):
#         self.isrecording = False
#         print('recording complete')
#         self.filename=input('the filename?')
#         self.filename = self.filename+".wav"
#         wf = wave.open(self.filename, 'wb')
#         wf.setnchannels(self.channels)
#         wf.setsampwidth(self.p.get_sample_size(self.sample_format))
#         wf.setframerate(self.fs)
#         wf.writeframes(b''.join(self.frames))
#         wf.close()
#         main.destroy()
#     def record(self):
       
#         while self.isrecording:
#             data = self.stream.read(self.chunk)
#             self.frames.append(data)

# main = tk.Tk()
# main.title('recorder')
# main.geometry('500x500')
# app = App(main)
# main.mainloop()