# 1.Install the necessary libraries

In [1]:
# Installation (only for Google Colab)
!pip install python_speech_features==0.6 hmmlearn==0.2.1
!pip install noisereduce
!pip install pyrubberband

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python_speech_features==0.6
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
Collecting hmmlearn==0.2.1
  Downloading hmmlearn-0.2.1.tar.gz (150 kB)
[K     |████████████████████████████████| 150 kB 3.8 MB/s 
Building wheels for collected packages: python-speech-features, hmmlearn
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-py3-none-any.whl size=5888 sha256=eed8f1146eb7596f5fe286b39a5fbe7c0d1be5816ac27513c87ddc109cdf690f
  Stored in directory: /root/.cache/pip/wheels/b0/0e/94/28cd6afa3cd5998a63eef99fe31777acd7d758f59cf24839eb
  Building wheel for hmmlearn (setup.py) ... [?25l[?25hdone
  Created wheel for hmmlearn: filename=hmmlearn-0.2.1-cp37-cp37m-linux_x86_64.whl size=368851 sha256=3c37ccab6d8a9702cf242dedf766ea239067448a84d2cddde9304a0b8fecd5ee
  Stored i

In [2]:
!pip install numpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!apt-get install rubberband-cli

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
The following NEW packages will be installed:
  rubberband-cli
0 upgraded, 1 newly installed, 0 to remove and 4 not upgraded.
Need to get 81.5 kB of archives.
After this operation, 237 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 rubberband-cli amd64 1.8.1-7ubuntu2 [81.5 kB]
Fetched 81.5 kB in 1s (58.1 kB/s)
Selecting previously unselected package rubberband-cli.
(Reading database ... 123942 files and directories currently installed.)
Preparing to unpack .../rubberband-cli_1.8.1-7ubuntu2_amd64.deb ...
Unpacking rubberband-cli (1.8.1-7ubuntu2) ...
Setting up rubberband-cli (1.8.1-7ubuntu2) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...


In [4]:
!pip install librosa
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


# 2.Mount Google Drive and import the necessary libraries

In [5]:
from google.colab import drive
drive.mount('/content/drive')

# Change working directory to be workshop folder
import os
os.chdir('/content/drive/My Drive/MTech in IS/5001Project/new_dataset')
!ls


Mounted at /content/drive
test  train


In [6]:
# Import packages
import numpy as np
import noisereduce as nr
from scipy.io import wavfile 
from pydub import AudioSegment
from pydub import effects

# For HMM model and audio feature extraction
from hmmlearn import hmm
from python_speech_features import mfcc

import matplotlib.pyplot as plt
%matplotlib inline


# 3.Define HMM trainer

In [7]:
# Class to handle all HMM related processing
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []
        self.model = hmm.GaussianHMM(n_components=self.n_components, covariance_type=self.cov_type, n_iter=self.n_iter)
        self.model.fit(X)

    # X is a 2D numpy array where each row is 13D
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    # Run the model on input data
    def get_score(self, input_data):
        return self.model.score(input_data)
    
    def display_info(self):
        print("transmat_", self.model.transmat_)
        print("n_features", self.model.n_features)
        
    def predict_state(self, input_data):
        return self.model.predict(input_data)


# 4.Training model

In [15]:
# Build an HMM model
input_folder = "train"

hmm_models = []
# Parse the input directory
for dirname in os.listdir(input_folder):
    # Get the name of the subfolder 
    subfolder = input_folder + '/' + dirname

    # Extract the class label
    label = dirname

    # Initialize variables
    X = np.array([])
    y_words = []

    # Iterate through the audio files (leaving 1 file for testing in each class)
    for filename in os.listdir(subfolder):
        if not filename.endswith('.wav'):
            continue
        # Read the input file
        filepath = subfolder + '/' + filename
        print("Process the file: %s" % filepath)
        sampling_freq, audio = wavfile.read(filepath)

        sampling_freq = sampling_freq/2.5
        audio = np.mean(audio, axis=1)

        # Extract MFCC features
        mfcc_features = mfcc(audio, sampling_freq)
        

        # Append to the variable X
        if len(X) == 0:
            X = mfcc_features
        else:
            X = np.append(X, mfcc_features, axis=0)
            
    # Append the label
    y_words.append(label)

    # Train and save HMM model
    hmm_trainer = HMMTrainer(n_components=4)
    hmm_trainer.train(X)
    hmm_models.append((hmm_trainer, label))
    hmm_trainer = None


Process the file: train/metal/can10 (4).wav
Process the file: train/metal/can10 (1).wav
Process the file: train/metal/can10 (5).wav
Process the file: train/metal/can10 (3).wav
Process the file: train/metal/can10 (2).wav
Process the file: train/metal/can20 (5).wav
Process the file: train/metal/can20 (4).wav
Process the file: train/metal/can20 (6).wav
Process the file: train/metal/can20 (7).wav
Process the file: train/metal/can20 (3).wav
Process the file: train/metal/can20 (1).wav
Process the file: train/metal/can20 (2).wav
Process the file: train/carton/box30 (37).wav
Process the file: train/carton/box30 (36).wav
Process the file: train/carton/box30 (35).wav
Process the file: train/carton/box30 (34).wav
Process the file: train/carton/box30 (38).wav
Process the file: train/carton/box30 (33).wav
Process the file: train/carton/box30 (31).wav
Process the file: train/carton/box30 (32).wav
Process the file: train/carton/box30 (30).wav
Process the file: train/carton/box10 (3).wav
Process the f

In [16]:
# Perform single audio recognition
# Results Output
# 1: Select test audio file
test_file_name = '/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal/can20 (10).wav'
sampling_freq, audio = wavfile.read(test_file_name)

sampling_freq = sampling_freq/2.5
audio = np.mean(audio, axis=1)
audio = nr.reduce_noise(y=audio, sr=sampling_freq, stationary=True)

# 2: Extract MFCC features
mfcc_features = mfcc(audio, sampling_freq)
max_score = None
output_label = None

# 3: Iterate through all HMM models and 
#   pick the one with the highest score
for item in hmm_models:
    hmm_model, label = item
    score = hmm_model.get_score(mfcc_features)
    if max_score is None:
        max_score = score
        output_label = label
    if score > max_score:
        max_score = score
        output_label = label
print('File:%s,Pred:%s,Score:%.4f'%(test_file_name,output_label,max_score))


File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal/can20 (10).wav,Pred:metal,Score:-17519.3202


# 5.Export and save models

In [9]:
import pickle

In [17]:
i = 0
for item in hmm_models:
    hmm_model, label = item
    print(label)
    modelname = 'model_2_' + str(i + 1) + ".pkl"
    with open(modelname, "wb") as f: pickle.dump(hmm_model, f)
    print("saved %s" %modelname)
    i+=1

metal
saved model_2_1.pkl
carton
saved model_2_2.pkl
plastic
saved model_2_3.pkl


In [19]:
fr = open('model_2_1.pkl','rb')
model_1 = pickle.load(fr)
fr = open('model_2_2.pkl','rb')
model_2 = pickle.load(fr)
fr = open('model_2_3.pkl','rb')
model_3 = pickle.load(fr)

reloaded_models = []
reloaded_models.append((model_1, 'metal'))
reloaded_models.append((model_2, 'carton'))
reloaded_models.append((model_3, 'plastic'))

# 6.test and get score

In [20]:
# 1: Select test audio file
test_file_name = '/content/drive/My Drive/MTech in IS/5001Project/dataset/test/metal/lankycan07.wav'
sampling_freq, audio = wavfile.read(test_file_name)

sampling_freq = sampling_freq/2.5
audio = np.mean(audio, axis=1)

# 2: Extract MFCC features
mfcc_features = mfcc(audio, sampling_freq)
max_score = None
output_label = None

# 3: Iterate through all HMM models and 
#   pick the one with the highest score
for item in reloaded_models:
    reloaded_model, label = item
    score = reloaded_model.model.score(mfcc_features)
    if max_score is None:
        max_score = score
        output_label = label
    if score > max_score:
        max_score = score
        output_label = label
print('File:%s,Pred:%s,Score:%.4f'%(test_file_name,output_label,max_score))

File:/content/drive/My Drive/MTech in IS/5001Project/dataset/test/metal/lankycan07.wav,Pred:metal,Score:-11800.5472


In [21]:
from glob import glob

In [22]:
voice_path = '/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal'
files = glob(voice_path+"/*wav")

for file in files:
  sampling_freq, audio = wavfile.read(file)

  sampling_freq = sampling_freq/2.5
  audio = np.mean(audio, axis=1)

  mfcc_features = mfcc(audio, sampling_freq)
  max_score = None
  output_label = None

  for item in reloaded_models:
    reloaded_model, label = item
    score = reloaded_model.model.score(mfcc_features)
    if max_score is None:
        max_score = score
        output_label = label
    if score > max_score:
        max_score = score
        output_label = label
  print('File:%s,Pred:%s,Score:%.4f'%(file,output_label,max_score))

File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal/can10 (7).wav,Pred:metal,Score:-23543.8563
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal/can10 (6).wav,Pred:metal,Score:-21836.1685
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal/can20 (9).wav,Pred:metal,Score:-12113.1133
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal/can20 (8).wav,Pred:metal,Score:-12169.5392
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/metal/can20 (10).wav,Pred:metal,Score:-11521.1503


In [23]:
voice_path = '/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic'
files = glob(voice_path+"/*wav")

for file in files:
  sampling_freq, audio = wavfile.read(file)

  sampling_freq = sampling_freq/2.5
  audio = np.mean(audio, axis=1)

  mfcc_features = mfcc(audio, sampling_freq)
  max_score = None
  output_label = None

  for item in reloaded_models:
    reloaded_model, label = item
    score = reloaded_model.model.score(mfcc_features)
    if max_score is None:
        max_score = score
        output_label = label
    if score > max_score:
        max_score = score
        output_label = label
  print('File:%s,Pred:%s,Score:%.4f'%(file,output_label,max_score))

File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle100 (12).wav,Pred:plastic,Score:-11453.5069
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle100 (13).wav,Pred:plastic,Score:-11093.6004
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle100 (15).wav,Pred:plastic,Score:-14221.2998
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle100 (16).wav,Pred:plastic,Score:-14321.6866
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle100 (14).wav,Pred:plastic,Score:-11266.0491
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle10 (8).wav,Pred:plastic,Score:-18067.0363
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle10 (9).wav,Pred:plastic,Score:-17005.0078
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/plastic/bottle10 (10).wav,Pred:plastic

In [24]:
voice_path = '/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton'
files = glob(voice_path+"/*wav")

for file in files:
  sampling_freq, audio = wavfile.read(file)

  sampling_freq = sampling_freq/2.5
  audio = np.mean(audio, axis=1)

  mfcc_features = mfcc(audio, sampling_freq)
  max_score = None
  output_label = None

  for item in reloaded_models:
    reloaded_model, label = item
    score = reloaded_model.model.score(mfcc_features)
    if max_score is None:
        max_score = score
        output_label = label
    if score > max_score:
        max_score = score
        output_label = label
  print('File:%s,Pred:%s,Score:%.4f'%(file,output_label,max_score))

File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box30 (41).wav,Pred:carton,Score:-10838.8001
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box30 (40).wav,Pred:carton,Score:-12525.5801
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box30 (39).wav,Pred:carton,Score:-12370.4425
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box10 (13).wav,Pred:carton,Score:-5629.4413
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box10 (15).wav,Pred:carton,Score:-7694.5355
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box10 (14).wav,Pred:carton,Score:-7737.0563
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box10 (16).wav,Pred:carton,Score:-7941.4240
File:/content/drive/My Drive/MTech in IS/5001Project/new_dataset/test/carton/box20 (17).wav,Pred:carton,Score:-14015.7592
File:/content/drive/My Drive