In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [2]:
def get_mfcc(file_path):
  y, sr = librosa.load(file_path) # read .wav file
  hop_length = math.floor(sr*0.010) # 10ms hop
  win_length = math.floor(sr*0.025) # 25ms frame
  # mfcc is 12 x T matrix
  mfcc = librosa.feature.mfcc(
      y, sr, n_mfcc=12, n_fft=1024,
      hop_length=hop_length, win_length=win_length)
  # substract mean from mfcc --> normalize mfcc
  mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
  # delta feature 1st order and 2nd order
  delta1 = librosa.feature.delta(mfcc, order=1)
  delta2 = librosa.feature.delta(mfcc, order=2)
  # X is 36 x T
  X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
  # return T x 36 (transpose of X)
  return X.T # hmmlearn use T x N matrix

In [3]:
def get_class_data(data_dir):
  files = os.listdir(data_dir)
  mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
  return mfcc

In [4]:
def clustering(X, n_clusters=20):
  kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
  kmeans.fit(X)
  print("centers", kmeans.cluster_centers_.shape)
  return kmeans  

In [38]:
if __name__ == "__main__":
  class_names = ["dich", "toi", "benh_nhan", "nguoi", "theo", "test_toi"]
  dataset = {}
  for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

  # Get all vectors in the datasets
  all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
  print("vectors", all_vectors.shape)
  # Run K-Means algorithm to get clusters
  kmeans = clustering(all_vectors)
  print("centers", kmeans.cluster_centers_.shape)

  models = {}
  for cname in class_names:
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])

    # =================================================================
    # toi
    hmm = hmmlearn.hmm.MultinomialHMM(
      n_components=9, init_params='e', params='ste', verbose=True
    )
    hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
    hmm.transmat_ = np.array([
        [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
        [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
      ])

    # =================================================================
    # nguoi
#     hmm = hmmlearn.hmm.MultinomialHMM(
#       n_components=15, init_params='e', params='ste', verbose=True
#     )
#     hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
#     hmm.transmat_ = np.array([
#         [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],    
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],    
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],    
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
#       ])

    # =================================================================
    # dich
#     hmm = hmmlearn.hmm.MultinomialHMM(
#       n_components=9, init_params='e', params='ste', verbose=True
#     )
#     hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
#     hmm.transmat_ = np.array([
#         [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
#       ])

    # =================================================================
    # theo
#     hmm = hmmlearn.hmm.MultinomialHMM(
#       n_components=9, init_params='e', params='ste', verbose=True
#     )
#     hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
#     hmm.transmat_ = np.array([
#         [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
#       ])

    # =================================================================
    # benh_nhan
#     hmm = hmmlearn.hmm.MultinomialHMM(
#         n_components=18, init_params='e', params='ste', verbose=True
#     )
    
#     hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
#     hmm.transmat_ = np.array([
#         [0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0],    
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],    
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0, 0.0],    
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2, 0.0],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.2],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5],
#         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]   
#     ])

    if cname[:4] != 'test':
      X = np.concatenate(dataset[cname])
      lengths = list([len(x) for x in dataset[cname]])
      print("training class", cname)
      print(X.shape, lengths, len(lengths))
      hmm.fit(X, lengths=lengths)
      models[cname] = hmm
  print("Training done")

  print("Testing and Labeling")

  for true_cname in class_names:
    print("==================================")
    print(true_cname)
    print("==================================")
    for O in dataset[true_cname]:
      scores = {}
      for cname, model in models.items():
        if cname[:4] != 'test':
            score = model.score(O, [len(O)])
            scores[cname] = score
#       print(scores)
      srt = sorted(scores.items(), key=lambda x: x[1], reverse=True)
      print(srt[0])

  

Load dich dataset


  b = a[a_slice]


Load toi dataset
Load benh_nhan dataset
Load nguoi dataset
Load theo dataset
Load test_toi dataset
vectors (18454, 36)
centers (20, 36)
centers (20, 36)
training class dich
(2963, 1) [19, 27, 32, 29, 24, 12, 48, 24, 18, 19, 35, 34, 45, 28, 36, 32, 23, 21, 43, 28, 18, 21, 28, 26, 27, 24, 33, 32, 25, 20, 42, 40, 28, 33, 21, 40, 17, 33, 22, 46, 20, 18, 24, 56, 36, 42, 31, 44, 25, 19, 27, 19, 22, 41, 24, 26, 35, 23, 22, 15, 19, 43, 39, 40, 42, 23, 23, 20, 19, 29, 44, 26, 37, 31, 47, 29, 19, 33, 33, 16, 26, 32, 13, 25, 39, 16, 22, 19, 39, 22, 34, 24, 30, 28, 21, 27, 24, 20, 15, 19, 18, 27, 21, 24, 44] 105


         1       -8667.9717             +nan
         2       -6307.0284       +2360.9433
         3       -5601.8486        +705.1799
         4       -5230.5576        +371.2910
         5       -5048.8669        +181.6907
         6       -4961.1742         +87.6927
         7       -4927.6567         +33.5175
         8       -4907.2707         +20.3860
         9       -4890.2528         +17.0179
        10       -4873.0472         +17.2057


training class toi
(3035, 1) [14, 39, 35, 33, 45, 38, 41, 18, 29, 32, 23, 20, 33, 40, 28, 25, 32, 28, 31, 21, 39, 30, 32, 53, 26, 21, 39, 19, 20, 23, 20, 12, 38, 20, 26, 28, 35, 28, 69, 27, 32, 17, 28, 31, 53, 55, 32, 51, 62, 21, 23, 48, 21, 19, 24, 23, 26, 15, 49, 20, 26, 52, 47, 37, 17, 42, 24, 37, 24, 25, 36, 16, 27, 43, 14, 19, 32, 11, 17, 38, 23, 15, 52, 18, 26, 20, 24, 31, 23, 36, 15, 51, 32, 31, 22, 22, 12, 17, 18, 27, 46, 30] 102


         1       -9048.8818             +nan
         2       -7309.9667       +1738.9151
         3       -6397.9353        +912.0314
         4       -5996.7241        +401.2112
         5       -5804.6431        +192.0810
         6       -5689.3221        +115.3210
         7       -5609.2204         +80.1017
         8       -5516.8490         +92.3714
         9       -5414.6580        +102.1910
        10       -5330.2370         +84.4210


training class benh_nhan
(4805, 1) [71, 33, 30, 44, 56, 39, 38, 48, 36, 44, 49, 43, 51, 58, 57, 49, 35, 44, 42, 63, 37, 74, 55, 41, 48, 45, 59, 46, 39, 67, 37, 52, 34, 39, 33, 87, 34, 36, 70, 64, 45, 43, 41, 54, 35, 52, 54, 69, 39, 29, 40, 42, 46, 46, 39, 48, 57, 75, 39, 52, 50, 55, 27, 72, 51, 49, 41, 47, 48, 39, 32, 48, 46, 25, 42, 56, 53, 28, 57, 34, 37, 43, 51, 58, 42, 54, 38, 85, 55, 42, 67, 37, 44, 42, 48, 59, 56, 42, 47, 26, 60] 101


         1      -15373.7292             +nan
         2      -11564.0990       +3809.6302
         3      -10714.6957        +849.4033
         4      -10239.2097        +475.4860
         5       -9994.3132        +244.8966
         6       -9862.3209        +131.9923
         7       -9803.6141         +58.7067
         8       -9739.3046         +64.3095
         9       -9678.9965         +60.3081
        10       -9661.1594         +17.8371
         1       -8085.5248             +nan
         2       -5561.8930       +2523.6318


training class nguoi
(2729, 1) [43, 32, 22, 13, 20, 34, 25, 44, 29, 47, 27, 19, 18, 20, 33, 31, 25, 20, 25, 17, 21, 20, 22, 28, 36, 35, 26, 14, 33, 22, 20, 18, 28, 33, 18, 15, 38, 22, 27, 20, 17, 29, 22, 20, 22, 14, 28, 24, 22, 22, 22, 20, 41, 50, 48, 38, 23, 16, 36, 30, 23, 24, 23, 16, 19, 24, 35, 28, 15, 37, 23, 20, 19, 51, 55, 20, 20, 18, 25, 19, 32, 22, 19, 22, 44, 53, 19, 30, 23, 36, 22, 22, 21, 18, 22, 26, 21, 30, 15, 25, 19, 20, 30, 40] 104


         3       -4979.6575        +582.2355
         4       -4627.4094        +352.2481
         5       -4356.7066        +270.7028
         6       -4275.1036         +81.6029
         7       -4248.5092         +26.5945
         8       -4226.7723         +21.7369
         9       -4195.4895         +31.2828
        10       -4181.3741         +14.1154


training class theo
(2959, 1) [51, 17, 22, 37, 57, 34, 39, 79, 11, 23, 11, 21, 29, 27, 21, 30, 40, 25, 48, 19, 29, 33, 40, 34, 27, 15, 37, 33, 39, 42, 56, 25, 26, 18, 13, 80, 63, 17, 38, 33, 26, 25, 15, 64, 32, 39, 14, 36, 15, 46, 22, 73, 21, 23, 63, 46, 32, 30, 29, 25, 29, 31, 27, 33, 22, 42, 31, 14, 28, 21, 28, 21, 30, 53, 28, 24, 15, 98, 24, 50, 14, 31, 39, 34, 39, 32, 49, 57] 88


         1       -8915.7606             +nan
         2       -7030.2281       +1885.5325
         3       -5874.3444       +1155.8837
         4       -5552.8236        +321.5208
         5       -5477.5787         +75.2449
         6       -5434.9760         +42.6027
         7       -5392.5397         +42.4363
         8       -5356.1419         +36.3978
         9       -5318.8036         +37.3383
        10       -5263.9498         +54.8538


Training done
Testing and Labeling
dich
('dich', -37.153073397616176)
('dich', -41.081385869422164)
('dich', -53.22378389712849)
('dich', -43.71379827001732)
('dich', -39.34400283827685)
('dich', -19.68232168317255)
('dich', -74.27183164727704)
('dich', -42.40460487916775)
('dich', -35.97224198546943)
('benh_nhan', -49.71952349209906)
('dich', -55.32630030224254)
('dich', -49.83428235957208)
('dich', -60.31376141094624)
('dich', -41.192642173985014)
('dich', -49.145317354421515)
('dich', -42.64800681606836)
('dich', -30.22189452258994)
('dich', -32.321864383296514)
('dich', -63.509490390696634)
('dich', -47.530701859644466)
('dich', -26.85470697773236)
('dich', -29.37771491740995)
('dich', -41.96436521234285)
('dich', -37.36110559082894)
('dich', -36.00837403296849)
('dich', -36.037873853313016)
('dich', -46.897020612822054)
('dich', -41.86721820660246)
('dich', -45.50090680060496)
('dich', -41.20052224778914)
('dich', -55.16993613059968)
('dich', -65.17097582365905)
('dich', -41.36282