In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

path_to_data = "./Data_Filtered"

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=2, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [3]:
class_names = ["ThanhPho", "Nha", "Me", "YTe", "Hoc", "test_ThanhPho", "test_Nha", "test_Me", "test_YTe", "test_Hoc"]

dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join(path_to_data, cname))
print("Done!!!")

Load ThanhPho dataset
Load Nha dataset
Load Me dataset
Load YTe dataset
Load Hoc dataset
Load test_ThanhPho dataset
Load test_Nha dataset
Load test_Me dataset
Load test_YTe dataset
Load test_Hoc dataset
Done!!!


In [4]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items() if k[:4]!='test'], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (55545, 36)


KeyboardInterrupt: 

In [None]:
dict_components = {
    #  ɲa̤ː˨˩ -> 2 âm vị -> 6 states
    "Nha": 6,
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": 6,
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": 9,
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": 15,
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": 9,
}

dict_startprob = {
    #  ɲa̤ː˨˩ -> 2 âm vị -> 6 states
    "Nha": [0.2, 0.8, 0.0, 0.0, 0.0, 0.0,], 
    
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": [0.2, 0.6, 0.2, 0.0, 0.0, 0.0,],
    
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": [0.2, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
    
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": [0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
    
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": [0.1, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
}

dict_transmat = {
    #  ɲa̤ː˨˩ 
    "Nha": [[0.2, 0.7, 0.1, 0.0, 0.0, 0.0,], 
            [0.0, 0.1, 0.8, 0.1, 0.0, 0.0,], 
            [0.0, 0.0, 0.1, 0.9, 0.0, 0.0,], 
            [0.0, 0.0, 0.0, 0.1, 0.6, 0.3,], 
            [0.0, 0.0, 0.0, 0.0, 0.4, 0.5,], 
            [0.0, 0.0, 0.0, 0.0, 0.0, 1.0,],], 
    
    #  mɛ̰ʔ˨˩ 
    "Me":  [[0.3, 0.4, 0.3, 0.0, 0.0, 0.0,], 
            [0.0, 0.2, 0.4, 0.4, 0.0, 0.0,], 
            [0.0, 0.0, 0.3, 0.4, 0.3, 0.0,], 
            [0.0, 0.0, 0.0, 0.3, 0.3, 0.4,], 
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.7,], 
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.6,],], 
    
    #  i˧˧ te˧˥ 
    "YTe": [[0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.6, 0.3, 0.1, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.4, 0.4, 0.2, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.7, 0.1,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.8,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,],],
    
    #   tʰa̤jŋ˨˩ fo˧˥ 
    "ThanhPho": [[0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.2, 0.6, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.2, 0.5, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.3, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.3, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15, 0.7, 0.15, 0.0, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6, 0.2, 0.0, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.7, 0.1, 0.0,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.6, 0.1,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.7,],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4,],],
    
    #  ha̰ʔwk˨ 
    "Hoc": [[0.3, 0.6, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.4, 0.5, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.4, 0.4, 0.2, 0.0, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.4, 0.3, 0.0,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.2, 0.1,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3,],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2,],],
}

In [None]:
models = {}
for cname in class_names:
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
    
    if cname[:4] != 'test':
        hmm = hmmlearn.hmm.MultinomialHMM(
            n_components=6, random_state=0, n_iter=1000, verbose=True,
            startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
            transmat_prior=np.array([
                [0.1,0.5,0.1,0.1,0.1,0.1,],
                [0.1,0.1,0.5,0.1,0.1,0.1,],
                [0.1,0.1,0.1,0.5,0.1,0.1,],
                [0.1,0.1,0.1,0.1,0.5,0.1,],
                [0.1,0.1,0.1,0.1,0.1,0.5,],
                [0.1,0.1,0.1,0.1,0.1,0.5,],
            ]),
        )
    
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")

In [None]:
def max_score(score):
    res = "None";
    for e in score:
        if res == "None":
            res = e
        else:
            if (score[e]>score[res]):
                res = e
    return res
    
print("Testing")
percent = {}
for true_cname in class_names:
    if (true_cname[:4]=='test'):
        print(true_cname,len(dataset[true_cname]))
        dc = 0
        for O in dataset[true_cname]:
            score = {cname : round(model.score(O, [len(O)]),3) for cname, model in models.items() if cname[:4] != 'test' }            
            if (max_score(score)==true_cname[5:]): dc+=1
            print(true_cname,score)
        print()
        percent[true_cname] = f"{dc}/{len(dataset[true_cname])}"

        
for k, v in percent.items():
    print(k,v)