# Experimento 1: HMM con un solo locutor

In [30]:
from os import listdir
from os.path import isdir, join
import librosa
import random
import numpy as np
import matplotlib.pyplot as plt
import python_speech_features
from scipy.io import wavfile


In [31]:
from hmmlearn import hmm

# Class to handle HMM processing(Python-Machine-Learning-Cookbook)
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=100):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components,
                    covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    # X is a 2D numpy array where each row is 16D
    def train(self, X, lengths):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X, lengths))#Feature matrix of individual samples.

    # Run the model on input data
    def get_score(self, input_data, lengths):
        return self.model.score(input_data, lengths)

In [32]:
# Load features
feature_sets_file = 'mfcc_sets_BD1.npz'
feature_sets = np.load(feature_sets_file)
feature_sets.files

['x', 'y', 'words']

In [33]:
words = feature_sets['words']
print(words)

['apple' 'banana' 'kiwi' 'lime' 'orange' 'peach' 'pineapple']


In [34]:
x = feature_sets['x']
y = feature_sets['y']
print(x)
print(y)

[[[ 1.60494287e+01  1.77168291e+01  2.00614752e+01 ...  1.65701323e+01
    1.50300061e+01  1.50654579e+01]
  [-2.65705047e+01 -2.39843626e+01 -3.33263386e+01 ...  9.04678799e-01
   -5.10792941e+00 -9.06651798e+00]
  [-3.15408914e+01 -3.98528340e+01 -4.17731441e+01 ... -2.80965052e+01
   -2.27616027e+01 -1.79807146e+01]
  ...
  [-4.26363748e+00  2.06917916e+00  1.34309924e+00 ...  2.09495754e-02
    9.76663975e+00  1.04019958e+00]
  [-2.85982016e+00  3.20241210e-01 -1.67534570e+00 ... -2.54961293e+00
   -1.61451492e+00 -1.11973593e+00]
  [ 2.65413344e+00 -8.50487760e-01 -4.38296565e+00 ... -4.04542126e+00
   -1.65963718e+00 -1.40887032e-01]]

 [[ 1.28163430e+01  1.35013287e+01  1.67616013e+01 ...  1.66162332e+01
    1.56226491e+01  1.64999523e+01]
  [-9.63858126e+00 -1.98680115e+01 -2.92952031e+01 ...  3.39053169e+00
   -2.05213563e+00  5.88430041e+00]
  [-1.38066260e+01 -2.71507641e+01 -4.88929503e+01 ... -2.40848664e+01
   -2.07413033e+01 -2.57768080e+01]
  ...
  [-5.81450548e+00  9.3

In [35]:
# Split MFFCC coefficients into random train and test subsets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print("Train files : ",len(x_train))
print("Test files : ", len(x_test))
print("y_train")
print(y_train)
print("y_test")
print(y_test)

Train files :  84
Test files :  21
y_train
[3. 1. 5. 2. 4. 1. 2. 2. 0. 5. 0. 6. 5. 1. 5. 0. 4. 3. 2. 2. 1. 4. 2. 0.
 2. 4. 4. 1. 1. 5. 1. 4. 3. 0. 1. 0. 1. 2. 0. 5. 0. 4. 2. 6. 5. 3. 3. 4.
 3. 6. 4. 6. 6. 2. 3. 3. 6. 3. 5. 2. 6. 3. 4. 5. 2. 1. 6. 0. 3. 1. 0. 1.
 5. 6. 4. 5. 5. 1. 4. 4. 0. 6. 3. 6.]
y_test
[2. 4. 4. 3. 3. 6. 6. 3. 0. 0. 1. 2. 5. 6. 5. 0. 5. 2. 0. 1. 6.]


## HMM

#### Train HMM models

In [36]:
X = [np.array([]) for i in range(0, len(words))]
lengths = [0 for i in range(0, len(words))]

# Iterate through the y_train
for i in range(len(y_train)):

    # Extract the label
    label = int(y_train[i])
    
    lengths[label] += 1
    
    # Extract MFCC features
    mfcc_features = x_train[i]
            
    # Append to the variable X
    if len(X[label]) == 0:
        X[label] = mfcc_features.T
    else:
        X[label] = np.append(X[label], mfcc_features.T, axis=0)

In [37]:
print(X[0].shape)
print(X[1].shape)
print(X[6].shape)

(363, 20)
(429, 20)
(363, 20)


In [38]:
print(lengths)

[11, 13, 12, 12, 13, 12, 11]


In [39]:
hmm_models = []# 1 modelo por cada label (palabra)

for label in range(len(X)):
    word = words[label]
    lengths_in = [33 for i in range(0, lengths[label])]
    print('training word:', word, X[label].shape, lengths[label])

    # Train and save HMM model
    hmm_trainer = HMMTrainer()
    hmm_trainer.train(X[label], lengths_in)
    hmm_models.append((hmm_trainer, label, word))
    hmm_trainer = None
    lengths_in = None

training word: apple (363, 20) 11
training word: banana (429, 20) 13
training word: kiwi (396, 20) 12
training word: lime (396, 20) 12
training word: orange (429, 20) 13
training word: peach (396, 20) 12
training word: pineapple (363, 20) 11


#### Test

In [40]:
y_pred = np.array([])
# Iterate through the y_test
for i in range(len(y_test)):
    

    # Extract the label and view the word
    test_label = int(y_test[i])
    test_word = words[test_label]
    
    # Extract MFCC features
    mfcc_features = x_test[i]

    # Define variables
    max_score = float('-inf')
    output_label = None
    output_word = word
    lengths_in = [33]
    
    # Iterate through all HMM models and pick
    # the one with the highest score
    for item in hmm_models:
        hmm_model, label, word = item
        score = hmm_model.get_score(mfcc_features.T, lengths_in)
        #print(score)
        if score > max_score:
            max_score = score
            output_label = label
            output_word = word
            
    y_pred = np.append(y_pred, output_label)

In [41]:
print(len(y_pred), len(y_test))

21 21


In [42]:
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 

results = confusion_matrix(y_test, y_pred) 
print('Confusion Matrix :')
print(results) 
print('Accuracy Score :',accuracy_score(y_test, y_pred)*100 )
print('Report : ')
print(classification_report(y_test, y_pred, target_names= words))

Confusion Matrix :
[[4 0 0 0 0 0 0]
 [0 2 0 0 0 0 0]
 [0 0 3 0 0 0 0]
 [0 0 0 3 0 0 0]
 [0 0 0 0 2 0 0]
 [0 0 0 0 0 3 0]
 [0 0 0 0 0 0 4]]
Accuracy Score : 100.0
Report : 
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00         4
      banana       1.00      1.00      1.00         2
        kiwi       1.00      1.00      1.00         3
        lime       1.00      1.00      1.00         3
      orange       1.00      1.00      1.00         2
       peach       1.00      1.00      1.00         3
   pineapple       1.00      1.00      1.00         4

    accuracy                           1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21

