<a href="https://colab.research.google.com/github/rbb-99/person-identification-with-footstep-audio/blob/main/IdentifyPersonFromHisFootsteps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction

This problem can be solved using a couple of approaches like clustering the features extracted from the audio wave (K-means, DBSCAN, GMM, Agglomerative clustering), using the classification approach that uses CNNs/RNNs, statistical methods, template matching, etc.
<br>We can also employ an ensemble of a variety of methods stated above.
<br>
<br>
Although, a problem with deep learning approach would be limited amount of data and using pretrained models won't give out the best results because none of them are characterized by footstep audios.
<br>
<br>
So here, we'll go for one of the unsupervised learning method in machine learning, an extension of KMeans Clustering, called Gaussian Mixture Models (GMM), a more robust model that can be used on real world data using Gaussian components.
<br>
With GMM, we effectively create a generative model for the data X, a probability model, which means we can do a lot of useful tasks like
- sampling new examples that we think are like the data that we measured
- comparing the collection data like the training and the test set to see if they differ
- imputing missing values from our data.

# Get data

In [1]:
!gdown --id '16DpLzP9TFFySH4r1X4E0dJkpDEII1aO6'

Downloading...
From: https://drive.google.com/uc?id=16DpLzP9TFFySH4r1X4E0dJkpDEII1aO6
To: /content/AB.7z
100% 87.4M/87.4M [00:00<00:00, 98.4MB/s]


In [3]:
!pip3 install py7zr

Collecting py7zr
  Downloading py7zr-0.20.5-py3-none-any.whl (66 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/66.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting texttable (from py7zr)
  Downloading texttable-1.6.7-py2.py3-none-any.whl (10 kB)
Collecting pycryptodomex>=3.6.6 (from py7zr)
  Downloading pycryptodomex-3.18.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyzstd>=0.14.4 (from py7zr)
  Downloading pyzstd-0.15.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (412 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m412.3/412.3 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyppmd<1.1.0,>=0.18.1 (from py7zr)
  Downloading pyppm

In [4]:
import py7zr
with py7zr.SevenZipFile('/content/AB.7z', mode='r') as archive:
    archive.extractall(path='/content/data')

In [None]:
!unzip '/content/data/A.zip' -d '/content/data/'

Archive:  /content/data/A.zip
   creating: /content/data/A/
  inflating: /content/data/A/STE-054_person A.wav  
  inflating: /content/data/A/STE-055_person A.wav  
  inflating: /content/data/A/STE-056_person A.wav  
  inflating: /content/data/A/STE-057_person A.wav  
  inflating: /content/data/A/STE-058_person A.wav  
  inflating: /content/data/A/STE-064_person A.wav  
  inflating: /content/data/A/STE-065_person A.wav  
  inflating: /content/data/A/STE-066_person A.wav  
  inflating: /content/data/A/STE-067_person A.wav  
  inflating: /content/data/A/STE-068_person A.wav  
  inflating: /content/data/A/STE-101_person A.wav  
  inflating: /content/data/A/STE-102_person A.wav  
  inflating: /content/data/A/STE-103_person A.wav  
  inflating: /content/data/A/STE-104_person A.wav  
  inflating: /content/data/A/STE-105_person A.wav  
  inflating: /content/data/A/STE-106_person A.wav  
  inflating: /content/data/A/STE-107_person A.wav  
  inflating: /content/data/A/STE-108_person A.wav  
  in

In [None]:
!unzip '/content/data/B.zip' -d '/content/data/'

Archive:  /content/data/B.zip
   creating: /content/data/B/
  inflating: /content/data/B/STE-059_person B.wav  
  inflating: /content/data/B/STE-060_person B.wav  
  inflating: /content/data/B/STE-061_person B.wav  
  inflating: /content/data/B/STE-062_person B.wav  
  inflating: /content/data/B/STE-063_person B.wav  
  inflating: /content/data/B/STE-080_person B.wav  
  inflating: /content/data/B/STE-081_person B.wav  
  inflating: /content/data/B/STE-082_person B.wav  
  inflating: /content/data/B/STE-083_person B.wav  
  inflating: /content/data/B/STE-084_person B.wav  
  inflating: /content/data/B/STE-085_person B.wav  
  inflating: /content/data/B/STE-086_person B.wav  
  inflating: /content/data/B/STE-087_person B.wav  
  inflating: /content/data/B/STE-088_person B.wav  
  inflating: /content/data/B/STE-089_person B.wav  
  inflating: /content/data/B/STE-090_person B.wav  
  inflating: /content/data/B/STE-091_person B.wav  
  inflating: /content/data/B/STE-092_person B.wav  
  in

In [None]:
!rm -r AB.7z data/A.zip data/B.zip

# Library imports

In [None]:
import os
import numpy as np
import pandas as pd
from scipy.io import wavfile as wav
from sklearn import mixture
import python_speech_features
from sklearn import preprocessing
import pickle

import warnings
warnings.filterwarnings("ignore")

# Restructure the folder

In [None]:
path_A = 'data/A'
path_B = 'data/B'

audio_files_A = [file for file in os.listdir(path_A)]
audio_files_B = [file for file in os.listdir(path_B)]

print(audio_files_A)
print(audio_files_B)

count_A = len(audio_files_A)
count_B = len(audio_files_B)

print("Number of audio files in folder A:", count_A)
print("Number of audio files in folder B:", count_B)

['STE-113_person A.wav', 'STE-066_person A.wav', 'STE-106_person A.wav', 'STE-056_person A.wav', 'STE-055_person A.wav', 'STE-054_person A.wav', 'STE-064_person A.wav', 'STE-114_person A.wav', 'STE-101_person A.wav', 'STE-107_person A.wav', 'STE-104_person A.wav', 'STE-067_person A.wav', 'STE-058_person A.wav', 'STE-108_person A.wav', 'STE-109_person A.wav', 'STE-102_person A.wav', 'STE-112_person A.wav', 'STE-105_person A.wav', 'STE-057_person A.wav', 'STE-103_person A.wav', 'STE-068_person A.wav', 'STE-065_person A.wav', 'STE-115_person A.wav', 'STE-110_person A.wav', 'STE-111_person A.wav']
['STE-060_person B.wav', 'STE-083_person B.wav', 'STE-090_person B.wav', 'STE-087_person B.wav', 'STE-085_person B.wav', 'STE-098_person B.wav', 'STE-095_person B.wav', 'STE-080_person B.wav', 'STE-099_person B.wav', 'STE-089_person B.wav', 'STE-084_person B.wav', 'STE-096_person B.wav', 'STE-061_person B.wav', 'STE-059_person B.wav', 'STE-082_person B.wav', 'STE-081_person B.wav', 'STE-088_perso

In [None]:
!mkdir test

In [None]:
# Let's take out any 2 files from B and 1 file from A for testing
!mv 'data/A/STE-107_person A.wav' 'test'

In [None]:
!mv 'data/B/STE-085_person B.wav' 'test'
!mv 'data/B/STE-097_person B.wav' 'test'

In [None]:
# A had 25 files and B had 26 files, now both have 24 files

In [None]:
!mkdir Models # save the clustering models here to later perform clustering ensemble

# Quick EDA

In [None]:
filename='data/A/STE-054_person A.wav'

In [None]:
wave_sample_rate, wave_audio = wav.read(filename)
print(wave_sample_rate) ## ~44kHz
print(wave_audio)

44100
[[-586 1593]
 [-536 1605]
 [-500 1581]
 ...
 [ -32  481]
 [ -45  476]
 [ -91  473]]


# Initialize constants

In [None]:
path_A='data/A'
path_B='data/B'

In [None]:
# Create the training paths array
train_paths_array=[]

def create_train_paths_array(folder_paths):
    for folder_path in folder_paths:
        for filename in os.listdir(folder_path):
          if filename.endswith(".wav"):
            file_path = os.path.join(folder_path, filename)
            train_paths_array.append(file_path)

create_train_paths_array([path_A,path_B])

print(train_paths_array)

['data/A/STE-113_person A.wav', 'data/A/STE-066_person A.wav', 'data/A/STE-106_person A.wav', 'data/A/STE-056_person A.wav', 'data/A/STE-055_person A.wav', 'data/A/STE-054_person A.wav', 'data/A/STE-064_person A.wav', 'data/A/STE-114_person A.wav', 'data/A/STE-101_person A.wav', 'data/A/STE-104_person A.wav', 'data/A/STE-067_person A.wav', 'data/A/STE-058_person A.wav', 'data/A/STE-108_person A.wav', 'data/A/STE-109_person A.wav', 'data/A/STE-102_person A.wav', 'data/A/STE-112_person A.wav', 'data/A/STE-105_person A.wav', 'data/A/STE-057_person A.wav', 'data/A/STE-103_person A.wav', 'data/A/STE-068_person A.wav', 'data/A/STE-065_person A.wav', 'data/A/STE-115_person A.wav', 'data/A/STE-110_person A.wav', 'data/A/STE-111_person A.wav', 'data/B/STE-060_person B.wav', 'data/B/STE-083_person B.wav', 'data/B/STE-090_person B.wav', 'data/B/STE-087_person B.wav', 'data/B/STE-098_person B.wav', 'data/B/STE-095_person B.wav', 'data/B/STE-080_person B.wav', 'data/B/STE-099_person B.wav', 'data/B

In [None]:
print(len(train_paths_array))

48


In [None]:
test_paths_array=[]
def create_test_paths_array(folder):
  for filename in os.listdir(folder):
    if filename.endswith(".wav"):
      file_path = os.path.join(folder, filename)
      test_paths_array.append(file_path)
create_test_paths_array('test')
print(test_paths_array)

['test/STE-085_person B.wav', 'test/STE-107_person A.wav', 'test/STE-097_person B.wav']


# Feature Extraction

In [None]:
# get the delta coefficients of the given mfcc_array
def get_delta_coeff(mfcc_array):
    rows,cols = mfcc_array.shape
    delta = np.zeros((rows,20)) # numcep=20
    n = 2
    for i in range(rows):
        index = []
        j=1
        while j<=n:
            if i-j<0:
              first=0
            else:
              first=i-j
            if i+j>rows-1:
                second = rows-1
            else:
                second=i+j
            index.append((second,first))
            j+=1
        delta[i]=(mfcc_array[index[0][0]]-mfcc_array[index[0][1]]+(2*(mfcc_array[index[1][0]]-mfcc_array[index[1][1]])))/10
    return delta

In [None]:
def get_stacked_mfcc_features(audio,rate):
    # trials with nfft=512,1024
    mfcc_feature = python_speech_features.mfcc(audio, rate, winlen=0.025, winstep=0.01, numcep=20, nfft=1280, appendEnergy=True)
    # print(mfcc_feature)
    mfcc_feature = preprocessing.scale(mfcc_feature)
    # print(mfcc_feature)
    delta_mfcc_feature = get_delta_coeff(mfcc_feature)
    # print(delta_mfcc_feature)
    stacked_mfccs = np.hstack((mfcc_feature,delta_mfcc_feature))
    return stacked_mfccs

In [None]:
print(get_stacked_mfcc_features(wave_audio,wave_sample_rate))
print(get_stacked_mfcc_features(wave_audio,wave_sample_rate).shape)

[[-0.4732636  -0.76344992  0.13700461 ... -0.33265516  0.04565319
   0.48109093]
 [-0.54838327 -0.43633851 -0.35400001 ... -0.33922419  0.27606469
   0.68510456]
 [ 0.04033391 -0.15148427  0.08769237 ... -0.07659365  0.36834332
   0.52607129]
 ...
 [-1.58444926 -0.94348499  0.3071221  ... -0.25329237 -0.26925286
   0.20468219]
 [-1.56440196 -1.10653256  0.07080522 ... -0.01639888 -0.08260036
   0.47396134]
 [-1.71951627 -1.18431966  0.01329227 ...  0.19524037  0.10577802
   0.30333364]]
(2158, 40)


# Train the GMM model

In [None]:
file_index_in_array = 1
features = np.asarray(())

for filename in train_paths_array:
    wave_sample_rate,wave_audio=wav.read(filename)
    vector=get_stacked_mfcc_features(wave_audio,wave_sample_rate)
    if features.size==0:
        features=vector
    else:
        features=np.vstack((features, vector))

    if file_index_in_array == 24:
        gmm = mixture.GaussianMixture(n_components=18, max_iter=250, covariance_type='diag',n_init=3)
        gmm.fit(features)
        model = filename[5]+".gmm"
        pickle.dump(gmm,open('Models/' + model,'wb'))
        print(model,"saved with data shape:",features.shape)
        features = np.asarray(())
        file_index_in_array = 0
    file_index_in_array = file_index_in_array + 1

A.gmm saved with data shape: (58377, 40)
B.gmm saved with data shape: (49787, 40)


# Test the GMM model

In [None]:
# initialize constants
model_paths_array=[]
def create_model_paths_array(folder):
    for filename in os.listdir(folder):
        if filename.endswith(".gmm"):
            file_path = os.path.join(folder, filename)
            model_paths_array.append(file_path)
create_model_paths_array('Models')
print(model_paths_array)

['Models/A.gmm', 'Models/B.gmm']


In [None]:
# load models
models_array = [pickle.load(open(filename,'rb')) for filename in model_paths_array if filename.endswith('.gmm')]
models_array

[GaussianMixture(covariance_type='diag', max_iter=250, n_components=18, n_init=3),
 GaussianMixture(covariance_type='diag', max_iter=250, n_components=18, n_init=3)]

In [None]:
person_arr = [filename[7] for filename in model_paths_array if filename.endswith('.gmm')]
person_arr

['A', 'B']

In [None]:
for filename in test_paths_array:
  wave_sample_rate,wave_audio = wav.read(filename)
  vector= get_stacked_mfcc_features(wave_audio,wave_sample_rate)
  likelihood = np.zeros(len(models_array))
  print('Actual person',filename[-5])
  for i in range(len(models_array)):
    gmm = models_array[i]
    score = np.array(gmm.score(vector))
    likelihood[i] = score.sum() # log likelihood

  res = np.argmax(likelihood)
  print("Predicted person", person_arr[res])
  print()

Actual person B
Predicted person B

Actual person A
Predicted person A

Actual person B
Predicted person B



------END OF CODE------