In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
from hmmlearn.hmm import GaussianHMM
from hmmlearn.hmm import GMMHMM
from sklearn.model_selection import train_test_split

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
#     for f in files:
#         print(f)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [5]:
import random

class_names = ['tôi', 'nhà', 'học', 'nhân viên', 'hà nội']
dataset = {}
dataset_train = {}
dataset_test = {}

for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))
#     uncomment to shuffle dataset
    random.shuffle(dataset[cname])
    train_size = int(0.8*len(dataset[cname]))
    dataset_train[cname] = dataset[cname][:train_size]
    dataset_test[cname] = dataset[cname][train_size:]

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
# print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
# Comment KMEANS for GMMHMM
# kmeans = clustering(all_vectors)
# print("centers", kmeans.cluster_centers_.shape)

Load tôi dataset
Load nhà dataset
Load học dataset
Load nhân viên dataset
Load hà nội dataset


In [6]:
models = {}

for cname in class_names:
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    hmm = hmmlearn.hmm.GMMHMM(
        n_components=7, n_mix = 5, random_state=42, n_iter=1000, verbose=True,
        params='mctw',
        init_params='mst',
#         startprob_prior = np.array([1., 0., 0., 0., 0., 0., 0.]),
#         transmat_prior = transitionMatrix()
    )
    hmm.startprob_ = np.array([1.0,0.0,0.0,0.0,0.0, 0.0,0.0])
    hmm.transmat_ = np.array([
            [0.7,0.3,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.7,0.3,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.7,0.3,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.7,0.3,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.7,0.3,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.7,0.3],
            [0.0,0.0,0.0,0.0,0.0,0.0,1.0],
        ])

    X = np.concatenate(dataset_train[cname])
    lengths = list([len(x) for x in dataset_train[cname]])
#     FOR GMMHMM: NO NEED lengths parameter
    hmm.fit(X)
    models[cname] = hmm
print("Training done")

1     -235886.0595             +nan
         2     -218424.9113      +17461.1481
         3     -214968.8947       +3456.0166
         4     -213809.6217       +1159.2730
         5     -213019.8899        +789.7318
         6     -212669.9124        +349.9776
         7     -212485.4935        +184.4189
         8     -212363.9972        +121.4963
         9     -212282.0864         +81.9109
        10     -212214.1209         +67.9655
        11     -212067.1377        +146.9831
        12     -211892.5023        +174.6355
        13     -211824.2264         +68.2759
        14     -211788.5762         +35.6501
        15     -211710.0802         +78.4961
        16     -211644.7417         +65.3384
        17     -211610.8506         +33.8911
        18     -211550.5915         +60.2590
        19     -211435.9325        +114.6590
        20     -211364.2466         +71.6859
        21     -211336.5397         +27.7069
        22     -211312.2811         +24.2586
        23     -211

In [7]:
print("Testing")
for true_cname in class_names:
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in dataset_test[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == true_cname:
            true_predict += 1
#         print(true_cname, score, predict)
    print(true_cname)
#     change dataset_test to dataset to test in full dataset
    print(f'TRUE PREDICT: {true_predict}/{len(dataset_test[true_cname])}')
    print('ACCURACY:', true_predict/len(dataset_test[true_cname]))                             

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Testing
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
De

In [8]:
print("Testing")
miss = {}
acc = {}
class_names = ["tôi", "nhà", "học", "nhân viên", "hà nội"]
for true_cname in class_names:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
#         print(true_cname, score, pre)
        if pre == true_cname:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)      

ariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
De

# Demo


In [9]:
O = get_mfcc('test.wav')
score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
inverse = [(value, key) for key, value in score.items()]
predict = max(inverse)[1]
predict

FileNotFoundError: [Errno 2] No such file or directory: 'test.wav'

In [None]:
class_names = ["hà nội"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    
    dataset[cname] = get_class_data(os.path.join("data/RECORD", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)

print("Testing")
acc = {}
test_name = { "hà nội"}
for true_cname in test_name:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
        print(true_cname, score, pre)
        if pre == true_cname[0:]:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

# Save model

In [211]:
models['toi']

GMMHMM(algorithm='viterbi', covariance_type='diag',
    covars_prior=array([[[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]],

       [[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]],

       ...,

       [[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]],

       [[-1.5, -1.5, ..., -1.5, -1.5],
        [-1.5, -1.5, ..., -1.5, -1.5]]]),
    covars_weight=array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       ...,

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]]),
    init_params='mst',
    means_prior=array([[[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       ...,

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]],

       [[0., 0., ..., 0., 0.],
        [0., 0., ..., 0., 0.]]]),
