In [8]:
import numpy as np
from mfe.dsp import generate_features
from mfcc.dsp import generate_features
from os.path import dirname, join as pjoin
from scipy.io import wavfile
import scipy.io
import warnings
warnings.filterwarnings('ignore')

## Load File and Raw Features Extraction

In [3]:
raw_features = wavfile.read("../audio/test_audio.wav") # read file
raw_features = np.array(raw_features[1].astype('int')) # save to array
print(raw_features.shape)

(16000,)


## Feature Extraction Using MFE

In [5]:
mfe_extracted = generate_features(implementation_version=3, 
                               draw_graphs=False, 
                               raw_data=raw_features, 
                               axes=[""],
                               sampling_freq=16000,
                               frame_length=0.032,
                               frame_stride=0.016,
                               num_filters=40,
                               fft_length=256,
                               low_frequency=300,
                               high_frequency=0,
                               win_size=101,
                               noise_floor_db=-52)
print(mfe_extracted)

{'features': [0.12109375, 0.140625, 0.140625, 0.109375, 0.109375, 0.11328125, 0.12890625, 0.12890625, 0.0625, 0.1328125, 0.1328125, 0.125, 0.1640625, 0.14453125, 0.10546875, 0.015625, 0.0546875, 0.14453125, 0.25390625, 0.2421875, 0.2265625, 0.26953125, 0.29296875, 0.28515625, 0.25, 0.26171875, 0.25, 0.33203125, 0.21484375, 0.34375, 0.47265625, 0.47265625, 0.70703125, 0.69140625, 0.5078125, 0.4609375, 0.45703125, 0.46875, 0.71484375, 0.70703125, 0.203125, 0.19921875, 0.19921875, 0.19921875, 0.19921875, 0.2265625, 0.20703125, 0.20703125, 0.21484375, 0.20703125, 0.20703125, 0.203125, 0.2265625, 0.2265625, 0.19921875, 0.2734375, 0.23828125, 0.234375, 0.265625, 0.30078125, 0.23828125, 0.25, 0.265625, 0.31640625, 0.25, 0.30078125, 0.34765625, 0.3203125, 0.34765625, 0.3828125, 0.5703125, 0.5390625, 0.71875, 0.69921875, 0.48046875, 0.39453125, 0.29296875, 0.35546875, 0.6953125, 0.69921875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.015625, 0.03515625, 0.03515625, 0.0, 0.0, 0.01171875, 0.1445312

In [6]:
mfe_feature = np.array(mfe_extracted['features'])
mfe_feature[:40]

array([0.12109375, 0.140625  , 0.140625  , 0.109375  , 0.109375  ,
       0.11328125, 0.12890625, 0.12890625, 0.0625    , 0.1328125 ,
       0.1328125 , 0.125     , 0.1640625 , 0.14453125, 0.10546875,
       0.015625  , 0.0546875 , 0.14453125, 0.25390625, 0.2421875 ,
       0.2265625 , 0.26953125, 0.29296875, 0.28515625, 0.25      ,
       0.26171875, 0.25      , 0.33203125, 0.21484375, 0.34375   ,
       0.47265625, 0.47265625, 0.70703125, 0.69140625, 0.5078125 ,
       0.4609375 , 0.45703125, 0.46875   , 0.71484375, 0.70703125])

## Feature Extraction Using MFCC

In [9]:
mfcc_extracted = generate_features(implementation_version=3, 
                               draw_graphs=False, 
                               raw_data=raw_features, 
                               axes=[""],
                               sampling_freq=16000,
                               frame_length=0.02,
                               frame_stride=0.02,
                               num_filters=13,
                               fft_length=256,
                               num_cepstral=13,
                               win_size=101,
                               low_frequency=300,
                               high_frequency=0,
                               pre_cof=0.98,
                               pre_shift=1)
print(mfcc_extracted)

{'features': [0.5091413259506226, -0.3626023530960083, 1.111251950263977, -0.11953943967819214, 1.587409257888794, -0.36993980407714844, -1.2605228424072266, 1.8133779764175415, -0.4062010943889618, 0.20932216942310333, 0.3076834976673126, -1.5279442071914673, 0.21145418286323547, -0.7300497889518738, -0.5796818733215332, 0.9377856254577637, 0.001270917011424899, -0.6550707817077637, 0.8046610951423645, -0.24838072061538696, 1.5722869634628296, -0.4555283784866333, 1.0309737920761108, -1.882876992225647, -0.9356431365013123, 1.4833976030349731, -0.6036334037780762, 0.36360082030296326, 0.4347515404224396, 1.71733820438385, -1.7151834964752197, 0.6324889063835144, -0.831133246421814, 1.2677009105682373, -0.8558818101882935, 0.9404685497283936, -1.919230580329895, 0.37870490550994873, 0.34631505608558655, 1.346851110458374, 0.21161358058452606, 2.109105110168457, -0.15427200496196747, -1.0935367345809937, 0.32806840538978577, -0.2100473791360855, 0.5953222513198853, -0.29571181535720825,

In [11]:
mfcc_feature = np.array(mfcc_extracted['features'])
mfcc_feature[:40]

array([ 5.09141326e-01, -3.62602353e-01,  1.11125195e+00, -1.19539440e-01,
        1.58740926e+00, -3.69939804e-01, -1.26052284e+00,  1.81337798e+00,
       -4.06201094e-01,  2.09322169e-01,  3.07683498e-01, -1.52794421e+00,
        2.11454183e-01, -7.30049789e-01, -5.79681873e-01,  9.37785625e-01,
        1.27091701e-03, -6.55070782e-01,  8.04661095e-01, -2.48380721e-01,
        1.57228696e+00, -4.55528378e-01,  1.03097379e+00, -1.88287699e+00,
       -9.35643137e-01,  1.48339760e+00, -6.03633404e-01,  3.63600820e-01,
        4.34751540e-01,  1.71733820e+00, -1.71518350e+00,  6.32488906e-01,
       -8.31133246e-01,  1.26770091e+00, -8.55881810e-01,  9.40468550e-01,
       -1.91923058e+00,  3.78704906e-01,  3.46315056e-01,  1.34685111e+00])

## save to numpy

In [15]:
from numpy import save
# define data
mfe = mfe_feature
mfcc = mfcc_feature
# save to npy file
save('x_train_mfe.npy', mfe)
save('x_train_mfcc.npy', mfcc)

In [18]:
data = np.load('x_train_mfe.npy')
print(data.shape)

(2440,)
