In [47]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
from hmmlearn.hmm import GaussianHMM
from hmmlearn.hmm import GMMHMM
from sklearn.model_selection import train_test_split
import soundfile as sf
import scipy as sp

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1))
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

In [50]:
import random

class_names = ['tôi', 'nhà', 'học', 'nhân viên', 'hà nội']
dataset = {}
dataset_train = {}
dataset_test = {}

for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("wav_file", cname))
#     uncomment to shuffle dataset
    random.shuffle(dataset[cname])
    train_size = int(0.8*len(dataset[cname]))
    dataset_train[cname] = dataset[cname][:train_size]
    dataset_test[cname] = dataset[cname][train_size:]

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
# print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
# Comment KMEANS for GMMHMM
# kmeans = clustering(all_vectors)
# print("centers", kmeans.cluster_centers_.shape)

Load tôi dataset
Load nhà dataset
Load học dataset
Load nhân viên dataset
Load hà nội dataset


In [53]:
toi_param = [
    9,
    np.array([1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),
    np.array([
        [0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
    ])
]

In [54]:
hoc_param = toi_param

In [55]:
nha_param = [
    6,
    np.array([1.0,0.0,0.0,0.0,0.0,0.0]),
    np.array([
        [0.7,0.3,0.0,0.0,0.0,0.0],
        [0.0,0.7,0.3,0.0,0.0,0.0],
        [0.0,0.0,0.7,0.3,0.0,0.0],
        [0.0,0.0,0.0,0.7,0.3,0.0],
        [0.0,0.0,0.0,0.0,0.7,0.3],
        [0.0,0.0,0.0,0.0,0.0,1.0],
    ])
]

In [56]:
hanoi_param = [
    10,
    np.array([1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),
    np.array([
        [0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
    ])
]

In [57]:
nhanvien_param = [
    12,
    np.array([1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),
    np.array([
        [0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
    ])
]

In [58]:
param_dict = {
    "tôi": toi_param,
    "nhà": nha_param,
    "học": hoc_param,
    "nhân viên": nhanvien_param,
    "hà nội": hanoi_param
}

In [59]:
param_dict["tôi"][0]

9

In [64]:
models = {}
for cname in class_names:
    class_vectors = dataset[cname]
#     use Multinominal HMM
#     dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
#     hmm = hmmlearn.hmm.MultinomialHMM(
#         n_components=20, random_state=0, n_iter=1000, verbose=True
#     )


    hmm = GMMHMM(
    n_components=param_dict[cname][0], n_mix = 4, random_state=42, n_iter=1000, verbose=True,
        params='mctw',
        init_params='mct'
    )
#     hmm.startprob_ = np.array([1.0,0.0,0.0,0.0,0.0])
#     hmm.startprob_ = np.array([1.0,0.0,0.0,0.0,0.0, 0.0,0.0])
    hmm.startprob_ = param_dict[cname][1]
    hmm.transmat_ = param_dict[cname][2]
#     hmm.transmat_ = np.array([
#         [0.7,0.3,0.0,0.0,0.0],
#         [0.0,0.7,0.3,0.0,0.0],
#         [0.0,0.0,0.7,0.3,0.0],
#         [0.0,0.0,0.0,0.7,0.3],
#         [0.0,0.0,0.0,0.0,1.0],
#     ])
#     hmm.transmat_ = np.array([
#             [0.7,0.3,0.0,0.0,0.0,0.0,0.0],
#             [0.0,0.7,0.3,0.0,0.0,0.0,0.0],
#             [0.0,0.0,0.7,0.3,0.0,0.0,0.0],
#             [0.0,0.0,0.0,0.7,0.3,0.0,0.0],
#             [0.0,0.0,0.0,0.0,0.7,0.3,0.0],
#             [0.0,0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,0.0,1.0],
#         ])

#     uncomment below line to train with full dataset
    X = np.concatenate(dataset[cname])
#     lengths = list([len(x) for x in dataset[cname]])
#     hmm.fit(X, lengths=lengths)

#     X = np.concatenate(dataset_train[cname])
    lengths = list([len(x) for x in dataset_train[cname]])
#     FOR GMMHMM: NO NEED lengths parameter
    hmm.fit(X)
    models[cname] = hmm
print("Training done")

print("Testing")

for true_cname in class_names:
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in dataset_test[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == true_cname:
            true_predict += 1
#         print(true_cname, score, predict)
    print(true_cname)
#     change dataset_test to dataset to test in full dataset
    print(f'TRUE PREDICT: {true_predict}/{len(dataset_test[true_cname])}')
    print('ACCURACY:', true_predict/len(dataset_test[true_cname]))

         1     -946992.1253             +nan
         2     -862518.8962      +84473.2291
         3     -838867.9319      +23650.9643
         4     -832475.9819       +6391.9500
         5     -830096.7652       +2379.2166
         6     -828826.4597       +1270.3056
         7     -827948.8608        +877.5988
         8     -827317.8666        +630.9943
         9     -826694.8037        +623.0628
        10     -826022.5367        +672.2671
        11     -825134.0207        +888.5159
        12     -823674.8794       +1459.1414
        13     -821149.4621       +2525.4173
        14     -819491.9517       +1657.5104
        15     -818226.4169       +1265.5348
        16     -814805.8533       +3420.5636
        17     -811807.7421       +2998.1112
        18     -809063.0600       +2744.6821
        19     -802994.3568       +6068.7031
        20     -794078.0851       +8916.2717
        21     -714087.1410      +79990.9441
        22     -714050.0764         +37.0646
        23

Training done
Testing
tôi
TRUE PREDICT: 32/32
ACCURACY: 1.0
nhà
TRUE PREDICT: 32/32
ACCURACY: 1.0
học
TRUE PREDICT: 31/32
ACCURACY: 0.96875
nhân viên
TRUE PREDICT: 32/32
ACCURACY: 1.0
hà nội
TRUE PREDICT: 32/32
ACCURACY: 1.0


In [6]:
import pickle
models = pickle.load(open("models_fixtransmat.pkl", "rb"))

In [61]:
# Test on Record files

class_names = ['tôi', 'nhà', 'học', 'nhân viên', 'hà nội']
dataset_record = {}

for cname in class_names:
    print(f"Load {cname} dataset")
    dataset_record[cname] = get_class_data(os.path.join("wav_file/RECORD", cname))

# Get all vectors in the datasets
all_vectors_record = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset_record.items()], axis=0)

Load tôi dataset
Load nhà dataset
Load học dataset
Load nhân viên dataset
Load hà nội dataset


In [8]:
len(dataset_record['tôi'])

60

In [48]:
import matplotlib.pyplot as plt

def remove_noise(file_path):
#     print(file_path)
    y,sr = librosa.load(file_path, duration=10)
    y_filted = sp.signal.medfilt(y,3)
    y_filted, index = librosa.effects.trim(y_filted, top_db=25)
    # y, index = librosa.effects.trim(y, top_db=10)

    # plot waveform
#     plt.figure()
#     plt.subplot(3,1,1)
    # librosa.display.waveplot(y_filted, sr = sr)
    new_path = file_path.split('.')[0]

    # write file
#     sf.write(new_path + '_removed.wav', y_filted, sr, 'PCM_24')
    sf.write(file_path, y_filted, sr, 'PCM_24')

In [62]:
for true_cname in class_names:
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in dataset_record[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == true_cname:
            true_predict += 1
#         print(true_cname, score, predict)
    print(true_cname)
#     change dataset_test to dataset to test in full dataset
    print(f'TRUE PREDICT: {true_predict}/{len(dataset_record[true_cname])}')
    print('ACCURACY:', true_predict/len(dataset_record[true_cname]))

tôi
TRUE PREDICT: 60/60
ACCURACY: 1.0
nhà
TRUE PREDICT: 60/60
ACCURACY: 1.0
học
TRUE PREDICT: 60/60
ACCURACY: 1.0
nhân viên
TRUE PREDICT: 60/60
ACCURACY: 1.0
hà nội
TRUE PREDICT: 61/61
ACCURACY: 1.0


In [17]:
models

{'tôi': GMMHMM(algorithm='viterbi', covariance_type='diag',
        covars_prior=array([[[-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5],
         [-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -...
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0.]]]),
        means_weight=array([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]),
        min_covar=0.001, n_components=5, n_iter=1000, n_mix=4, params='mctw',
        random_state=42, startprob_prior=1.0, tol=0.01, transmat_prior=1.0,
        verbose=True,
        weights_prior=array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1.

In [65]:
import pickle 
with open("models_noise.pkl", "wb") as file:
    pickle.dump(models, file)