In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import hmmlearn.hmm
from sklearn.cluster import KMeans

path_to_data = "./Data"

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [15]:

class_names = ["Nha", "Me", "YTe", "ThanhPho", "Hoc",]
test_class_names = ["test_Nha", "test_Me", "test_YTe", "test_ThanhPho", "test_Hoc"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join(path_to_data, cname))
print('Done!!!')

Load Nha dataset
Load Me dataset
Load YTe dataset
Load ThanhPho dataset
Load Hoc dataset
Done!!!


In [16]:

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (55545, 36)
centers (10, 36)
centers (10, 36)
Done


# 

In [None]:

models = {}
for cname in class_names:
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
    hmm = hmmlearn.hmm.MultinomialHMM(
        n_components=6, random_state=0, n_iter=1000, verbose=True,
        init_params='e', params = 'ste',
        #startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0]),
        transmat_prior=np.array([
            [0.2,0.7,0.1,0.0,0.0,0.0,],
            [0.0,0.1,0.6,0.3,0.0,0.0,],
            [0.0,0.0,0.1,0.7,0.2,0.0,],
            [0.0,0.0,0.0,0.3,0.5,0.2,],
            [0.0,0.0,0.0,0.0,0.6,0.4,],
            [0.0,0.0,0.0,0.1,0.0,1.0,],
        ]),
    )
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")


training class Nha
(8849, 1) [92, 141, 143, 78, 95, 92, 45, 74, 102, 72, 92, 92, 100, 74, 85, 95, 90, 78, 74, 69, 97, 71, 97, 53, 111, 75, 75, 93, 61, 100, 98, 102, 67, 107, 80, 114, 97, 68, 141, 117, 95, 88, 124, 124, 82, 85, 96, 65, 62, 77, 105, 86, 91, 110, 77, 111, 78, 81, 88, 83, 80, 93, 101, 88, 87, 87, 100, 73, 68, 67, 86, 97, 75, 77, 114, 71, 76, 120, 78, 142, 76, 97, 55, 83, 106, 46, 83, 87, 49, 92, 85, 61, 168, 71, 85, 90, 110, 98, 112] 99


         1      -22005.7373             +nan
         2      -15701.1762       +6304.5612
         3      -14196.0752       +1505.1010
         4      -11945.5953       +2250.4798
         5       -9325.7296       +2619.8657
         6       -7651.2166       +1674.5130
         7       -6920.2666        +730.9501
         8       -6463.9613        +456.3052
         9       -5928.7893        +535.1720
        10       -5490.0634        +438.7259
        11       -5391.5881         +98.4753
        12       -5367.9132         +23.6748
        13       -5349.0681         +18.8451
        14       -5325.3166         +23.7515
        15       -5266.4271         +58.8895
        16       -5159.7525        +106.6746
        17       -5050.3479        +109.4047
        18       -5003.2378         +47.1101
        19       -4989.6515         +13.5863
        20       -4961.4378         +28.2138
        21       -4910.8823         +50.5555
        22       -4887.0968         +23.7854
        23

training class Me
(10468, 1) [101, 103, 66, 106, 101, 106, 115, 100, 96, 104, 98, 100, 98, 95, 98, 106, 106, 105, 108, 117, 101, 105, 99, 99, 101, 102, 100, 103, 102, 100, 126, 106, 100, 95, 111, 102, 116, 106, 106, 95, 100, 98, 96, 139, 98, 108, 110, 124, 105, 94, 95, 118, 93, 96, 98, 100, 111, 101, 95, 103, 98, 131, 101, 135, 109, 146, 117, 105, 96, 95, 112, 100, 109, 122, 107, 125, 101, 115, 112, 124, 106, 132, 121, 116, 119, 113, 107, 111, 93, 105, 129, 131, 80, 79, 82, 74, 76, 79, 81, 88] 100


         1      -23763.7620             +nan
         2      -17579.1412       +6184.6208
         3      -17087.3713        +491.7698
         4      -15928.0541       +1159.3173
         5      -13874.3682       +2053.6858
         6      -10238.9328       +3635.4355
         7       -8184.4259       +2054.5068
         8       -7807.9095        +376.5165
         9       -7560.5908        +247.3187
        10       -7392.2893        +168.3014
        11       -7063.8359        +328.4534
        12       -6780.8807        +282.9552
        13       -6700.8449         +80.0358
        14       -6655.2520         +45.5929
        15       -6590.3731         +64.8789
        16       -6500.3545         +90.0186
        17       -6424.4854         +75.8692
        18       -6354.4677         +70.0177
        19       -6271.5645         +82.9031
        20       -5977.0782        +294.4863
        21       -5883.4974         +93.5809
        22       -5871.0632         +12.4342
        23

training class YTe
(12847, 1) [92, 129, 131, 127, 127, 125, 134, 121, 126, 146, 128, 119, 120, 139, 127, 136, 158, 164, 136, 154, 119, 156, 139, 125, 111, 129, 130, 117, 115, 124, 128, 117, 124, 130, 119, 126, 109, 150, 128, 153, 147, 120, 136, 126, 136, 119, 118, 123, 159, 129, 128, 184, 133, 125, 142, 135, 150, 131, 122, 127, 132, 118, 114, 125, 120, 120, 124, 128, 130, 120, 119, 119, 147, 128, 123, 123, 111, 151, 118, 129, 115, 113, 149, 121, 118, 114, 114, 116, 133, 120, 120, 120, 120, 114, 116, 118, 165, 120, 115, 149] 100


         2      -21299.8773      +11147.5365
         3      -18755.6116       +2544.2657
         4      -15144.7991       +3610.8125
         5      -13051.9396       +2092.8595
         6      -11467.7910       +1584.1487
         7       -9626.9222       +1840.8688
         8       -8497.6316       +1129.2905
         9       -8114.2542        +383.3774
        10       -7887.4503        +226.8039
        11       -7643.3968        +244.0535
        12       -7350.9638        +292.4330
        13       -7244.4857        +106.4781
        14       -7206.2205         +38.2651
        15       -7183.8187         +22.4019
        16       -7164.0412         +19.7774
        17       -7151.6229         +12.4183
        18       -7143.5301          +8.0928
        19       -7137.6866          +5.8435
        20       -7136.2146          +1.4719
        21       -7136.0562          +0.1584
        22       -7135.9591          +0.0971
        23       -7135.8952          +0.0639
        24

training class ThanhPho
(12660, 1) [141, 129, 141, 136, 137, 113, 175, 144, 132, 102, 123, 150, 192, 191, 124, 169, 134, 110, 144, 90, 161, 110, 116, 130, 104, 80, 133, 125, 115, 103, 104, 102, 98, 116, 127, 131, 152, 123, 121, 142, 242, 84, 126, 132, 89, 126, 91, 131, 135, 133, 133, 130, 141, 137, 145, 101, 128, 104, 136, 130, 130, 95, 145, 159, 150, 97, 90, 124, 86, 120, 100, 151, 149, 105, 95, 95, 100, 121, 279, 126, 121, 124, 101, 124, 82, 133, 108, 197, 111, 124, 121, 143, 113, 112, 91, 111, 132, 148, 103, 100] 100


         2      -24337.7617       +6151.0796
         3      -22715.4079       +1622.3538
         4      -19560.9255       +3154.4824
         5      -16657.1059       +2903.8196
         6      -13642.8953       +3014.2105
         7      -11096.5175       +2546.3778
         8       -9790.6589       +1305.8586
         9       -9575.9722        +214.6867
        10       -9548.0394         +27.9328
        11       -9509.2379         +38.8014
        12       -9379.3894        +129.8486
        13       -9120.4740        +258.9154
        14       -8955.7527        +164.7212
        15       -8885.3316         +70.4211
        16       -8852.4745         +32.8571
        17       -8835.8760         +16.5985
        18       -8827.5485          +8.3275
        19       -8823.6308          +3.9177
        20       -8821.9185          +1.7122
        21       -8821.1130          +0.8055
        22       -8820.6935          +0.4195
        23       -8820.4476          +0.2459
        24

training class Hoc
(10721, 1) [104, 136, 101, 104, 97, 98, 101, 103, 102, 107, 93, 96, 154, 109, 96, 97, 100, 144, 141, 93, 154, 101, 130, 106, 105, 93, 110, 104, 107, 141, 71, 130, 97, 108, 96, 166, 105, 105, 103, 141, 100, 108, 99, 103, 96, 70, 97, 99, 96, 101, 98, 103, 99, 101, 101, 94, 145, 102, 107, 103, 103, 100, 99, 105, 172, 103, 101, 105, 93, 93, 101, 112, 95, 168, 95, 100, 112, 94, 125, 98, 147, 103, 97, 104, 97, 110, 101, 93, 101, 115, 99, 113, 97, 104, 96, 100, 103, 102, 103, 91] 100


In [None]:

print("Testing")
for true_cname in test_class_names:
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        print(true_cname, score)

