In [29]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import random

path_to_data = "./Data_Filtered"

In [30]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=2048,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    ls = os.listdir(data_dir)
    files = [f for f in ls if f.endswith(".wav")]
    random.shuffle(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files]
    return mfcc

def clustering(X, n_clusters=15):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [31]:
class_names = ["ThanhPho", "Nha", "Me", "YTe", "Hoc",]# "test_ThanhPho", "test_Me", "test_Nha", "test_YTe", "test_Hoc",]

datas = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    datas[cname] = get_class_data(os.path.join(path_to_data, cname))
    #print(len(datas[cname]))
    datas[f"test_{cname}"] = datas[cname][-20:]
    datas[cname] = datas[cname][:-20]
    
print("Done!!!")

Load ThanhPho dataset
143
Load Nha dataset
158
Load Me dataset
163
Load YTe dataset
163
Load Hoc dataset
150
Done!!!


In [32]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items() if k[:4]!='test'], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (95233, 36)
centers (15, 36)
centers (15, 36)
Done


In [40]:
dict_components = {
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": 15,
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": 6,
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": 9,
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": 9,
    #  ɲa̤ː˨˩ -> 3 âm vị -> 9 states
    "Nha": 9,
} 

dict_transmat = {
    "ThanhPho": [[0.,  0. , 0.,  0.7, 0.,  0. , 0.,  0.3, 0. , 0.,  0. , 0. , 0. , 0. , 0. ,],
                 [0.,  0.9, 0.,  0. , 0.,  0. , 0.,  0. , 0. , 0.,  0. , 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 1.,  0. , 0.,  0. , 0.,  0. , 0. , 0.,  0. , 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  1. , 0.,  0. , 0.,  0. , 0. , 0.,  0. , 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0. , 1.,  0. , 0.,  0. , 0. , 0.,  0. , 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0. , 0.,  0.9, 0.,  0. , 0. , 0.,  0. , 0.1, 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0. , 0.,  0. , 1.,  0. , 0. , 0.,  0. , 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0.4, 0.,  0. , 0.,  0.3, 0. , 0.,  0. , 0. , 0. , 0.2, 0. ,],
                 [0.,  0. , 0.,  0. , 0.,  0. , 0.,  0. , 0.2, 0.,  0.8, 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0. , 0.,  0. , 0.,  0. , 0. , 1.,  0. , 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0.2, 0.,  0. , 0.,  0. , 0. , 0.,  0.8, 0. , 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0. , 0.,  0. , 0.,  0. , 0. , 0.,  0.1, 0.9, 0. , 0. , 0. ,],
                 [0.,  0. , 0.,  0. , 0.,  0. , 0.,  0. , 0. , 0.,  0. , 0. , 0.9, 0. , 0. ,],
                 [0.,  0. , 0.,  0. , 0.,  0.1, 0.,  0. , 0. , 0.,  0. , 0. , 0. , 0.8, 0.1,],
                 [0.,  0. , 0.,  0. , 0.,  0.1, 0.,  0. , 0. , 0.,  0. , 0. , 0. , 0. , 0.8,],],
#     [[0.84, 0.0, 0.057, 0.00084, 0.0, 0.0, 0.0, 0.029, 0.0, 0.0, 0.064, 0.0, 0.0, 0.013, 0.0],
#                  [0.0, 0.96, 0.0, 0.0, 0.0028, 0.0, 0.0064, 0.0, 0.0, 0.0, 0.012, 0.0, 0.022, 0.0, 0.0],
#                  [0.068, 0.27, 0.19, 0.0, 0.0, 0.0, 0.021, 0.15, 0.0, 0.0, 0.12, 0.0, 0.19, 0.0, 0.0],
#                  [0.035, 0.0, 0.0, 0.96, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0065, 0.0, 0.0, 0.0],
#                  [0.0, 0.0019, 0.010, 0.0, 0.99, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#                  [0.021, 0.0, 0.0, 0.0, 0.0, 0.95, 0.0, 0.0, 0.03, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#                  [0.0, 0.0015, 0.012, 0.0, 0.0, 0.0, 0.99, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#                  [0.034, 0.0, 0.012, 0.032, 0.0, 0.0, 0.0, 0.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.022, 0.18],
#                  [0.0, 0.0, 0.0, 0.0, 0.0, 0.065, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.037],
#                  [0.0, 0.0, 0.0, 0.04, 0.072, 0.0, 0.0, 0.0, 0.0, 0.041, 0.0, 0.0, 0.0, 0.0, 0.85],
#                  [0.0, 0.032, 0.0041, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.84, 0.0, 0.12, 0.0, 0.0],
#                  [0.0, 0.0, 0.0, 0.0016, 0.0, 0.0, 0.0, 0.0, 0.016, 0.0074, 0.0, 0.087, 0.0, 0.0, 0.1],
#                  [0.0, 0.0, 0.07, 0.0, 0.002, 0.0049, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.81, 0.11, 0.0],
#                  [0.046, 0.0, 0.0093, 0.0, 0.0, 0.0, 0.0, 0.067, 0.0, 0.0, 0.0, 0.0, 0.0, 0.92, 0.0],
#                  [0.088, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.24, 0.0, 0.0, 0.58, 0.0, 0.0, 0.3],],

    "Nha":  [[0.,  0. , 0.6, 0.4, 0. , 0. , 0. , 0.,  0. ,],
             [0.,  0.7, 0. , 0. , 0.2, 0.1, 0. , 0.,  0. ,],
             [0.,  0. , 0.9, 0. , 0. , 0. , 0. , 0.,  0.1,],
             [0.,  0. , 0. , 1. , 0. , 0. , 0. , 0.,  0. ,],
             [0.,  0. , 0. , 0. , 1. , 0. , 0. , 0.,  0. ,],
             [0.,  0. , 0. , 0. , 0. , 0.9, 0. , 0.,  0. ,],
             [0.,  0. , 0. , 0. , 0. , 0. , 0.9, 0.,  0.1,],
             [0.,  0. , 0. , 0. , 0. , 0. , 0. , 1.,  0. ,],
             [0.,  0. , 0.1, 0. , 0. , 0. , 0. , 0.,  0.8,],],


    "Me":   [[0.4, 0.4,  0.3, 0.,  0.,  0.,  0., ],
             [0.,  0.7, 0.,  0.3, 0.,  0.,  0., ],
             [0.1, 0.4,  0.5, 0.,  0.,  0.,  0., ],
             [0.,  0.6, 0.,  0.4, 0.,  0.,  0., ],
             [0.,  0.,  0.,  0.,  0.9, 0.,  0.1,],
             [0.,  0.,  0.,  0.,  0.,  1.,  0., ],
             [0.,  0.,  0.,  0.,  0.,  0.1, 0.9,]],
 
    "YTe":  [[0.8, 0. , 0.,  0.1, 0. , 0. , 0. , 0. , 0. ,],
             [0. , 0.2, 0.,  0. , 0.6, 0. , 0.1, 0. , 0. ,],
             [0. , 0. , 0.,  0. , 0.2, 0.4, 0. , 0.3, 0. ,],
             [0.1, 0. , 0.,  0.9, 0. , 0. , 0. , 0. , 0. ,],
             [0. , 0.1, 0.,  0. , 0.7, 0. , 0.1, 0. , 0. ,],
             [0. , 0. , 0.,  0. , 0. , 0.9, 0. , 0. , 0. ,],
             [0. , 0.1, 0.,  0. , 0.6, 0. , 0.2, 0. , 0. ,],
             [0. , 0. , 0.,  0. , 0. , 0.1, 0. , 0.9, 0. ,],
             [0. , 0. , 0.,  0. , 0. , 0. , 0. , 0. , 0.9,],],
 
    "Hoc":  [[0.5, 0.,  0.3, 0.1, 0. , 0. , 0.1, 0.1, 0. ,],
             [0. , 1.,  0. , 0. , 0. , 0. , 0. , 0. , 0. ,],
             [0.3, 0.,  0.5, 0. , 0. , 0. , 0.2, 0.1, 0. ,],
             [0. , 0.,  0. , 0.9, 0. , 0. , 0. , 0.1, 0. ,],
             [0. , 0.,  0. , 0. , 0.9, 0. , 0. , 0. , 0.1,],
             [0. , 0.,  0. , 0. , 0.1, 0.8, 0.1, 0. , 0. ,],
             [0. , 0.,  0.1, 0. , 0. , 0. , 0.9, 0. , 0. ,],
             [0.3, 0.,  0.1, 0. , 0. , 0.1, 0. , 0.4, 0. ,],
             [0. , 0.,  0. , 0. , 0. , 0.2, 0. , 0. , 0.8,],],
}

In [None]:
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    km = kmeans
    dataset[cname] = list([km.predict(v).reshape((-1,1)) for v in datas[cname]])
    dataset[f"test_{cname}"] = list([km.predict(v).reshape((-1,1)) for v in datas[f"test_{cname}"]])

    if cname[:4] != 'test':
        n = dict_components[cname]
        startprob = np.zeros(n)
        startprob[0] = 1
        #transmat=np.diag(np.full(n,1))
        transmat = np.array(dict_transmat[cname])
        
        hmm = hmmlearn.hmm.MultinomialHMM(
            n_components=n, random_state=0, n_iter=1000, tol=0.03, verbose=True,
            startprob_prior=startprob,
            transmat_prior=transmat,
            init_params='te', params='ste'
        )
    
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")

training class ThanhPho
(19428, 1) [181, 133, 202, 192, 161, 140, 144, 163, 102, 159, 133, 110, 132, 97, 162, 175, 171, 167, 167, 103, 189, 204, 130, 176, 148, 233, 197, 192, 166, 207, 152, 170, 132, 171, 161, 176, 131, 137, 217, 151, 160, 152, 184, 161, 141, 154, 127, 74, 192, 108, 121, 145, 134, 133, 162, 186, 121, 141, 197, 81, 159, 77, 126, 191, 149, 121, 111, 173, 130, 159, 161, 228, 166, 100, 162, 217, 147, 137, 149, 90, 166, 156, 166, 168, 192, 156, 272, 136, 147, 135, 142, 142, 194, 148, 181, 128, 89, 159, 126, 132, 126, 158, 149, 238, 161, 166, 152, 202, 167, 220, 204, 220, 181, 145, 173, 97, 186, 175, 194, 242, 154, 194, 128] 123


         1      -53711.4937             +nan
         2      -45071.2090       +8640.2846
         3      -44481.3475        +589.8615
         4      -42309.6428       +2171.7047
         5      -36077.2398       +6232.4030
         6      -28328.3294       +7748.9104
         7      -23097.6053       +5230.7240
         8      -21655.9316       +1441.6738
         9      -20906.7964        +749.1351
        10      -20580.7442        +326.0523
        11      -20117.9024        +462.8418
        12      -19089.0953       +1028.8071
        13      -17484.8137       +1604.2816
        14      -17092.8985        +391.9153
        15      -16969.7490        +123.1494
        16      -16904.6194         +65.1297
        17      -16848.1731         +56.4463
        18      -16808.6345         +39.5386
        19      -16784.1359         +24.4985
        20      -16767.3786         +16.7573
        21      -16728.6462         +38.7324
        22      -16621.7156        +106.9306
        23

In [None]:
def max_score(score):
    res = "None";
    for e in score:
        if res == "None":
            res = e
        else:
            if (score[e]>score[res]):
                res = e
    return res
    
print("Testing")
percent = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    
    print(true_cname,len(datas[true_cname]))
    dc = 0
    for O in dataset[true_cname]:
        score = {cname : round(model.score(O, [len(O)]),3) for cname, model in models.items()}            
        if (max_score(score)==true_cname[5:]): dc+=1
        print(true_cname,score,(max_score(score)==true_cname[5:]))
    print()
    percent[true_cname] = f"{dc}/{len(datas[true_cname])}"

        
for k, v in percent.items():
    print(k,v)

In [39]:
np.set_printoptions(precision=1, suppress=True)
for k, v in models.items():
    print(k,v.transmat_)

ThanhPho [[0.  0.  0.8 0.  0.  0.  0.2 0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.2 0.  0.  0.  0.  0.  0.  0.2 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.9 0.  0.  0.  0.1 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.9 0.  0.1 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.3 0.  0.  0.  0.  0.7 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.8 0.  0.  0.  0.1 0. ]
 [0.  0.  0.1 0.  0.  0.  0.1 0.  0.  0.  0.7 0.  0.1 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.9 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.1 0.  0.8 0.1]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.9]]
Nha [[0.4 0.  0.1 0.1 0.  0.  0.4 0.  0. ]
 [