In [2]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
from hmmlearn.hmm import GaussianHMM
from hmmlearn.hmm import GMMHMM
from sklearn.model_selection import train_test_split

In [3]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [4]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
#     for f in files:
#         print(f)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [5]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [8]:
import random

class_names = ['tôi', 'nhà', 'học', 'nhân viên', 'hà nội']
dataset = {}
dataset_train = {}
dataset_test = {}

for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("wav_file", cname))
#     uncomment to shuffle dataset
    random.shuffle(dataset[cname])
    train_size = int(0.8*len(dataset[cname]))
    dataset_train[cname] = dataset[cname][:train_size]
    dataset_test[cname] = dataset[cname][train_size:]

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
# print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
# Comment KMEANS for GMMHMM
# kmeans = clustering(all_vectors)
# print("centers", kmeans.cluster_centers_.shape)

Load tôi dataset
Load nhà dataset
Load học dataset
Load nhân viên dataset
Load hà nội dataset


In [9]:
models = {}

for cname in class_names:
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    hmm = hmmlearn.hmm.GMMHMM(
        n_components=7, n_mix = 2, random_state=42, n_iter=1000, verbose=True,
        params='mctw',
        init_params='mst',
#         startprob_prior = np.array([1., 0., 0., 0., 0., 0., 0.]),
#         transmat_prior = transitionMatrix()
    )
    hmm.startprob_ = np.array([1.0,0.0,0.0,0.0,0.0, 0.0,0.0])
    hmm.transmat_ = np.array([
            [0.7,0.3,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.3,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.3,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.3,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.3,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])

    X = np.concatenate(dataset_train[cname])
    lengths = list([len(x) for x in dataset_train[cname]])
#     FOR GMMHMM: NO NEED lengths parameter
    hmm.fit(X)
    models[cname] = hmm
print("Training done")

         1     -210652.5281             +nan
         2     -201616.8623       +9035.6658
         3     -198649.9633       +2966.8990
         4     -198048.6827        +601.2807
         5     -197860.5356        +188.1471
         6     -197748.0645        +112.4711
         7     -197661.1619         +86.9026
         8     -197543.1840        +117.9779
         9     -197503.4112         +39.7728
        10     -197444.4287         +58.9825
        11     -197407.6647         +36.7640
        12     -197352.6349         +55.0298
        13     -197273.5756         +79.0593
        14     -197194.1059         +79.4697
        15     -197142.8571         +51.2488
        16     -197109.8490         +33.0081
        17     -197083.7898         +26.0593
        18     -197060.8647         +22.9251
        19     -196945.0738        +115.7909
        20     -196825.8477        +119.2261
        21     -196762.4697         +63.3780
        22     -196727.9571         +34.5126
        23

Training done


        82     -353760.0119          +0.0086


In [10]:
print("Testing")
for true_cname in class_names:
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in dataset_test[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == true_cname:
            true_predict += 1
#         print(true_cname, score, predict)
    print(true_cname)
#     change dataset_test to dataset to test in full dataset
    print(f'TRUE PREDICT: {true_predict}/{len(dataset_test[true_cname])}')
    print('ACCURACY:', true_predict/len(dataset_test[true_cname]))                             

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


tôi
TRUE PREDICT: 20/20
ACCURACY: 1.0


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


nhà
TRUE PREDICT: 15/20
ACCURACY: 0.75


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


học
TRUE PREDICT: 20/21
ACCURACY: 0.9523809523809523


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


nhân viên
TRUE PREDICT: 18/21
ACCURACY: 0.8571428571428571


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


hà nội
TRUE PREDICT: 20/20
ACCURACY: 1.0


In [201]:
models

{'toi': GMMHMM(algorithm='viterbi', covariance_type='diag',
     covars_prior=array([[[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        ...,
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]]]),
     covars_weight=array([[[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        ...,
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]]]),
     init_params='mst',
     means_prior=array([[[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        ...,
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., .

In [202]:
print("Testing")
miss = {}
acc = {}
class_names = ["toi", "song", "truoc", "nhan_vien", "gia_dinh"]
for true_cname in class_names:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
#         print(true_cname, score, pre)
        if pre == true_cname:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)      

Testing
toi   89
song   92
truoc   80
nhan_vien   81
gia_dinh   79
{'toi': 100.0, 'song': 100.0, 'truoc': 100.0, 'nhan_vien': 100.0, 'gia_dinh': 98.75}


# Demo


In [204]:
O = get_mfcc('data.wav')
score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
inverse = [(value, key) for key, value in score.items()]
predict = max(inverse)[1]
predict

  b = a[a_slice]


'gia_dinh'

In [205]:
class_names = ["test_toi"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)

print("Testing")
acc = {}
test_name = { "test_toi"}
for true_cname in test_name:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
        print(true_cname, score, pre)
        if pre == true_cname[5:]:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

Load test_toi dataset
vectors (895, 36)
Testing
test_toi {'toi': -7620.212225111643, 'song': -8230.521415545485, 'truoc': -8144.95941912607, 'nhan_vien': -8055.198681860034, 'gia_dinh': -8394.809761647619} toi
test_toi {'toi': -2149.219995999329, 'song': -2412.9368211004685, 'truoc': -2362.355993332991, 'nhan_vien': -2303.6970214219964, 'gia_dinh': -2363.120670330845} toi
test_toi {'toi': -4754.502610596342, 'song': -5116.315693293601, 'truoc': -5357.214125396858, 'nhan_vien': -5237.692756976712, 'gia_dinh': -4976.711196599017} toi
test_toi {'toi': -1351.7292054794661, 'song': -1544.0518867550961, 'truoc': -1376.3849420262004, 'nhan_vien': -1426.545629349316, 'gia_dinh': -1475.5824434061235} toi
test_toi {'toi': -2143.280006319747, 'song': -2328.4712382758908, 'truoc': -2223.3070208305335, 'nhan_vien': -2233.811970473114, 'gia_dinh': -2324.3715712918483} toi
test_toi {'toi': -8820.564295791386, 'song': -9605.533709422523, 'truoc': -10004.417025004443, 'nhan_vien': -9616.831028032006, '

# Save model

In [None]:
# with open('output1.txt', 'w') as f:
#     print(models, file=f)

In [None]:
# import pickle 
# with open("output1.pkl", "wb") as file:
#     pickle.dump(models, file)

In [211]:
models['toi']

GMMHMM(algorithm='viterbi', covariance_type='diag',
    covars_prior=array([[[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]],

       [[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]],

       ...,

       [[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]],

       [[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]]]),
    covars_weight=array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       ...,

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]]),
    init_params='mst',
    means_prior=array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       ...,

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]]),
