In [1]:
import numpy as np
import os
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing
%matplotlib inline

from utils import *

#### Get data

In [2]:
fpaths = []
labels = []
word_spoken = []


dataset = '41'
input_folder = 'data\{}'.format(dataset)

# pars the input directory that contains audio files
# get audio files and their lables

for f in os.listdir(input_folder):
    for w in os.listdir(input_folder+'\\'+ f):
        # check wheter files is wav or not
        
        if (w.find('wav')!=-1):
            fpaths.append(input_folder+'\\'+f+'\\'+w)
            labels.append(f)
            if f not in word_spoken:
                word_spoken.append(f)
print("Spoken words: "+ str(word_spoken))

Spoken words: ['0_zero', '1_one', '2_two', '3_three', '4_four', '5_five', '6_six', '7_seven', '8_eight', '9_nine', 'aboard', 'adjusted & locked', 'All switches', 'Alternate air door', 'A_C Documents', 'Battery+Main bus', 'Cabin doors', 'Checked', 'Circuit Breakers', 'Closed', 'Cockpit', 'Cockpit checklist completed', 'Completed', 'decimal', 'Flight Controls', 'Fuel Quantity', 'Fuel Selector', 'Fuel Shutoff Valve', 'Fuel Temperature', 'in', 'locked', 'off', 'On', 'open', 'preflight_inspection', 'removed', 'Seats & Belts', 'Shut-off cabin heat', 'sufficient', 'Towbar', 'Weight and balance']


In [3]:
# size of dataset
print(len(labels))

21433


## Extracting frequeny domain features

At the second stage we convet a signal into the freqency domain. In monst modern speech recognitoon freqeency-domain features are used as key component. In case of multispeakers MFFC feature extraction works best. After convert a signal into a freq domain, it's requered to convert it into a useable form. **Mel Frequency Cepstral Coefficients (MFCC)** is a good way to do that. *MFCC* takes the power spectrum of a signal and then uses a combination of filter banks and disrete cosinetransform to extract pattern of phones or features.

After extracting **MFFC** features we exract data into single data matrix, and a label vector with the correct label for eac data file is ceated. 

In [4]:
from scipy.io import wavfile

data = []

mfcc_max_length = 0

# first file desitination name and index
file_name = ''
word_spoken_index = 0

for n,file in enumerate(fpaths):
    
    # show current desintation 
    if (file.find(file_name)<=0):
        file_name=word_spoken[word_spoken_index]
        print(word_spoken[word_spoken_index])
        word_spoken_index+=1

    # read file 
    sampling_freq, audio = wavfile.read(file)
    # Extract MFCC features
    mfcc_features = mfcc(audio, sampling_freq)

    
    mfcc_len=mfcc_features.shape[0]*mfcc_features.shape[1]
    # get length of largets feature array
    if mfcc_len>mfcc_max_length:    
        mfcc_max_length=mfcc_len
     
    # flat data into 2D array
    mfcc_features=np.resize(mfcc_features,(1,mfcc_len))
    
    data.insert(n,mfcc_features)

0_zero
1_one
2_two
3_three
4_four
5_five
6_six
7_seven
8_eight
9_nine
aboard
adjusted & locked
All switches
Alternate air door
A_C Documents
Battery+Main bus
Cabin doors
Checked
Circuit Breakers
Closed
Cockpit
Cockpit checklist completed
Completed
decimal
Flight Controls
Fuel Quantity
Fuel Selector
Fuel Shutoff Valve
Fuel Temperature
in
locked
off
On
open
preflight_inspection
removed
Seats & Belts
Shut-off cabin heat
sufficient
Towbar
Weight and balance


#### Get Labels

In [5]:
label_idx_dic={} # dict for storing labels

#Each sample file is one row in data, and has one entry in labels
print('Number of files total:', len(data))
all_labels = np.zeros(len(data),dtype=int)
for n, l in enumerate(set(labels)):
    label_idx_dic[n]=l
    all_labels[np.array([i for i, _ in enumerate(labels) if _ == l])] = n
print('Labels and label indices', all_labels)

Number of files total: 21433
Labels and label indices [33 33 33 ...  3  3  3]


In [16]:
# save label dictionary
def save_dict_to_file(dic):
    f = open('dict.txt','w')
    f.write(str(dic))
    f.close()

# load label dictionary
def load_dict_from_file():
    f = open('dict.txt','r')
    data=f.read()
    f.close()
    return eval(data)

# save dict
save_dict_to_file(label_idx_dic)

# read dict
label_idx_dic = load_dict_from_file()

In [17]:
print(label_idx_dic)

{0: 'decimal',
 1: 'Seats & Belts',
 2: '8_eight',
 3: 'locked',
 4: 'Cabin doors',
 5: '6_six',
 6: 'Shut-off cabin heat',
 7: 'Closed',
 8: 'Checked',
 9: '0_zero',
 10: 'in',
 11: 'Completed',
 12: '2_two',
 13: '3_three',
 14: '5_five',
 15: '7_seven',
 16: 'Fuel Shutoff Valve',
 17: 'Fuel Temperature',
 18: 'preflight_inspection',
 19: '1_one',
 20: 'sufficient',
 21: 'off',
 22: 'Cockpit',
 23: 'On',
 24: '4_four',
 25: 'Battery+Main bus',
 26: 'Weight and balance',
 27: 'Flight Controls',
 28: '9_nine',
 29: 'Fuel Quantity',
 30: 'Fuel Selector',
 31: 'Cockpit checklist completed',
 32: 'aboard',
 33: 'open',
 34: 'removed',
 35: 'adjusted & locked',
 36: 'Circuit Breakers',
 37: 'All switches',
 38: 'Alternate air door',
 39: 'A_C Documents',
 40: 'Towbar'}

#### Pad Zeros to Small Arrays

In [9]:
x_data = np.zeros((len(data),mfcc_max_length+1), dtype=float)

for i,_d in enumerate(data):
    x_data[i,1:_d.shape[1]+1]=_d
    x_data[i,0]=all_labels[i]

In [11]:
# save all features data
np.savetxt('data.csv', x_data, delimiter=',')