In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pdb
from keras.utils import to_categorical
from scipy.fftpack import fft
from scipy.io import wavfile
from sklearn.preprocessing import LabelEncoder
from python_speech_features import mfcc

Using TensorFlow backend.


## Data used: Speech Commands Data Set v0.01

In [2]:
ROOT_DIR = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath('__file__'))))
DATA_DIR = os.path.join(ROOT_DIR, 'data_asr', 'data')
DATA_INFO = os.path.join(ROOT_DIR, 'data_asr', 'data_info')

In [3]:
data_list = []

In [4]:
test_files = pd.read_csv(os.path.join(DATA_INFO, 'testing_list.txt'), sep = ' ', header = None)[0].tolist()
val_files = pd.read_csv(os.path.join(DATA_INFO, 'validation_list.txt'), sep = ' ', header = None)[0].tolist()

In [5]:
test_lab = [os.path.dirname(i) for i in test_files]
val_lab = [os.path.dirname(i) for i in val_files]

In [6]:
test_list = [os.path.join(DATA_DIR, f) for f in test_files if f.endswith('.wav')]
val_list = [os.path.join(DATA_DIR, f) for f in val_files if f.endswith('.wav')]

In [7]:
for root, dirs, files in os.walk(DATA_DIR):
    data_list += [root + '/'+ f for f in files if f.endswith('.wav')]

In [8]:
train_list = list(set(data_list) - set(test_list) - set(val_list))

In [None]:
train_lab = [os.path.basename(os.path.dirname(i)) for i in train_list]

In [None]:
lab = LabelEncoder()

train_encode = lab.fit_transform(train_lab)
y_encode = lab.fit_transform(train_encode)

val_encode = lab.fit_transform(val_lab)
yval_encode = lab.fit_transform(val_encode)

test_encode = lab.fit_transform(test_lab)
ytest_encode = lab.fit_transform(test_encode)

In [None]:
ytrain = to_categorical(y_encode)
yval = to_categorical(yval_encode)
ytest = to_categorical(ytest_encode)

In [None]:
(ytrain.shape, yval.shape, ytest.shape)

In [None]:
np.save('ytrain.npy', ytrain)

In [None]:
np.save('yval.npy', yval)

In [None]:
np.save('ytest.npy', ytest)

In [9]:
def wav2np(wav_list):
    
    samp_list, x_data = [], []
    
    for i in wav_list:
        samp_, x = wavfile.read(i)
        samp_list.append(samp_)
        x_data.append(x)
        
    return samp_list, x_data

In [10]:
samp_train, x_train = wav2np(train_list)
samp_val, x_val = wav2np(val_list)
samp_test, x_test = wav2np(test_list)

In [11]:
#Returns mfcc features of shape (98, 40)
def mfcc_gen(arr):
    return mfcc(arr, winlen = 0.03, numcep = 40, nfilt = 40)

In [12]:
#Zero padding function for data to get all the features in the same input shape
def len_pad(arr):

    if len(arr) == 98:
        return arr
    else:
        stack = (98 - len(arr), 40)     
        return np.vstack((arr, np.zeros(stack)))

In [13]:
def data_preprocess(x):
    
    feature = []
    for i in x:
        mfccs = mfcc_gen(i)
        feature.append(len_pad(mfccs))
        
    return np.asarray(feature)

## Load and pre-process array individually to save memory

In [14]:
train_feat = data_preprocess(x_train)

In [15]:
train_feat.shape

(51088, 98, 40)

In [16]:
np.save('train_feat.npy', train_feat)

In [17]:
val_feat = data_preprocess(x_val)

In [18]:
np.save('val_feat.npy', val_feat)

In [19]:
test_feat = data_preprocess(x_test)

In [20]:
np.save('test_feat.npy', test_feat)