In [45]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [46]:
import zipfile
zip_ref = zipfile.ZipFile("./drive/My Drive/hmm-speech-recognition-0.1.zip", 'r')
zip_ref.extractall()
zip_ref.close()

In [47]:
!pip install hmmlearn
!pip install features



In [48]:
!pip install librosa



In [49]:
import os
import argparse

import numpy as np
from scipy.io import wavfile
from hmmlearn import hmm
import librosa


In [50]:
from librosa.feature import mfcc

In [51]:
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components,
                    covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    # X is a 2D numpy array where each row is 13D
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    # Run the model on input data
    def get_score(self, input_data):
        return self.model.score(input_data)

In [52]:
input_folder='./hmm-speech-recognition-0.1/audio'

In [53]:
 os.listdir(input_folder)

['banana', 'apple', 'orange', 'peach', 'pineapple', 'kiwi', 'lime']

In [54]:
for dirname in os.listdir(input_folder):
        # Get the name of the subfolder
      subfolder = os.path.join(input_folder, dirname)
      #print(subfolder)
      label = subfolder[subfolder.rfind('/') + 1:]
      print(label)

banana
apple
orange
peach
pineapple
kiwi
lime


In [55]:
hmm_models = []
for dirname in os.listdir(input_folder):
    subfolder = os.path.join(input_folder, dirname)
    if not os.path.isdir(subfolder):
         continue
    label = subfolder[subfolder.rfind('/') + 1:]
    X = np.array([])
    y_words = []
    for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
            filepath = os.path.join(subfolder, filename)
            sampling_freq, audio = librosa.load(filepath)
            mfcc_features = librosa.feature.mfcc(y=sampling_freq, sr=audio)
            if len(X) == 0:
                X = mfcc_features[:,:15]
            else:
                X = np.append(X, mfcc_features[:,:15], axis=0)
            y_words.append(label)
    print('X.shape =', X.shape)
    hmm_trainer = HMMTrainer()
    hmm_trainer.train(X)
    hmm_models.append((hmm_trainer, label))
    hmm_trainer = None


X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)


In [56]:
# Test files
input_files = [
            './hmm-speech-recognition-0.1/audio/pineapple/pineapple11.wav',
            './hmm-speech-recognition-0.1/audio/orange/orange01.wav',
            './hmm-speech-recognition-0.1/audio/apple/apple13.wav',
            './hmm-speech-recognition-0.1/audio/kiwi/kiwi11.wav'
            ]


In [57]:
for input_file in input_files:
      sampling_freq, audio = librosa.load(input_file)

        # Extract MFCC features
      mfcc_features = librosa.feature.mfcc(y=sampling_freq, sr=audio)
      mfcc_features=mfcc_features[:,:15]

      scores=[]
      for item in hmm_models:
          hmm_model, label = item

          score = hmm_model.get_score(mfcc_features)
          scores.append(score)
      index=np.array(scores).argmax()
      print("\nTrue:", input_file[input_file.find('/')+1:input_file.rfind('/')])
      print("Predicted:", hmm_models[index][1])




True: hmm-speech-recognition-0.1/audio/pineapple
Predicted: pineapple

True: hmm-speech-recognition-0.1/audio/orange
Predicted: orange

True: hmm-speech-recognition-0.1/audio/apple
Predicted: apple

True: hmm-speech-recognition-0.1/audio/kiwi
Predicted: kiwi
