# Wavelet Transform Resolution Power

In this example we take the wavelet transform of the input signal using a Daubechies-4 tap filter and for every resolution we find the square power.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import Audio
import librosa
import librosa.display
import pywt

In [2]:
from SimpleSpeechCommands import get_word_dict, read_list, load_data
from SimpleSpeechCommands import append_examples,partition_directory, reduce_examples

In [3]:
word_to_label,label_to_word = get_word_dict()
path_dataset = '/home/edoardobucheli/TFSpeechCommands/train/audio'
#path_dataset = '/Users/edoardobucheli/Documents/MCC/Tesis/Kaggle_SpeechCommands/train/audio'
sr = 16000
file_length = 16000

Load Data

In [4]:
training_files = read_list(path_dataset,'training_files.txt')
validation_files = read_list(path_dataset,'validation_files.txt')
testing_files = read_list(path_dataset,'testing_files.txt')

We will work only with 10 known commands, no unknown.

In [5]:
training_files = reduce_examples(training_files,10,0,word_to_label)
validation_files = reduce_examples(validation_files,10,0,word_to_label)
testing_files = reduce_examples(testing_files,10,0,word_to_label)

In [6]:
x_train,y_train = load_data(training_files,sr,file_length,path_dataset,word_to_label)
x_val,y_val = load_data(validation_files,sr,file_length,path_dataset,word_to_label)
x_test,y_test = load_data(testing_files,sr,file_length,path_dataset,word_to_label)

100%|██████████| 18538/18538 [00:06<00:00, 2999.09it/s]
100%|██████████| 2577/2577 [00:00<00:00, 3056.57it/s]
100%|██████████| 2567/2567 [00:00<00:00, 3021.32it/s]


In [7]:
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)

(18538, 16000)
(18538,)
(2577, 16000)
(2577,)
(2567, 16000)
(2567,)


### Preprocess Data

In [8]:
from pywt import wavedec
from tqdm import tqdm

In [9]:
test = wavedec(x_train[0],'db4')
level = len(test)

In [14]:
x_train_2 = np.zeros((len(x_train),level*2))

for i, wave in enumerate(tqdm(x_train)):
    
    wt_this =  wavedec(wave,'db4')
    
    for j,band in enumerate(wt_this):
        
        band /= np.max(np.abs(band))
        
        this_pow = np.sum(np.square(band))
        this_std = np.std(band)
        
        x_train_2[i,j] = this_pow
        x_train_2[i,j+level] = this_std

100%|██████████| 18538/18538 [00:13<00:00, 1387.60it/s]


In [15]:
x_val_2 = np.zeros((len(x_val),level*2))

for i, wave in enumerate(tqdm(x_val)):
    
    wt_this =  wavedec(wave,'db4')
    
    for j,band in enumerate(wt_this):
        
        band /= np.max(np.abs(band))
        
        this_pow = np.sum(np.square(band))
        this_std = np.std(band)
        
        x_val_2[i,j] = this_pow
        x_val_2[i,j+level] = this_std

100%|██████████| 2577/2577 [00:01<00:00, 1406.55it/s]


In [16]:
x_test_2 = np.zeros((len(x_test),level*2))

for i, wave in enumerate(tqdm(x_test)):
    
    wt_this =  wavedec(wave,'db4')
    
    for j,band in enumerate(wt_this):
        
        band /= np.max(np.abs(band))
        
        this_pow = np.sum(np.square(band))
        this_std = np.std(band)
        
        x_test_2[i,j] = this_pow
        x_test_2[i,j+level] = this_std

100%|██████████| 2567/2567 [00:01<00:00, 1337.24it/s]


## Other Processing

Create One-Hot Encoding

In [17]:
from Utilities import make_oh

In [18]:
N_train, _ = x_train_2.shape
N_val, _ = x_val_2.shape
N_test, _ = x_test_2.shape

n_classes = len(np.unique(y_train))

In [19]:
y_train_oh = make_oh(y_train)
y_val_oh = make_oh(y_val)
y_test_oh = make_oh(y_test)

In [20]:
print(x_train_2.shape)
print(y_train_oh.shape)
print(x_val_2.shape)
print(y_val_oh.shape)
print(x_test_2.shape)
print(y_test_oh.shape)

(18538, 24)
(18538, 10)
(2577, 24)
(2577, 10)
(2567, 24)
(2567, 10)


## Multi Layer Perceptron

In [21]:
from tensorflow.keras.layers import Dropout, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [36]:
def DNN_3HL(input_shape,n_classes):
    X_input = Input(input_shape)
        
    X = Dense(64,activation='sigmoid')(X_input)
    
    X = Dense(64,activation='sigmoid')(X)
    
    X = Dense(n_classes,activation='softmax')(X)
    
    model = Model(inputs = X_input, outputs= X)
    
    return model

In [37]:
input_shape = (level*2,)
lr = 0.01

In [38]:
model1 = DNN_3HL(input_shape, n_classes)
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 24)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 64)                1600      
_________________________________________________________________
dense_9 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_10 (Dense)             (None, 10)                650       
Total params: 6,410
Trainable params: 6,410
Non-trainable params: 0
_________________________________________________________________


In [39]:
model1.compile(optimizer=Adam(lr),loss='categorical_crossentropy',metrics = ['accuracy'])

In [40]:
model1.fit(x_train_2,y_train_oh,
           batch_size=256, epochs = 50,
           validation_data=[x_val_2,y_val_oh],
           shuffle=True,verbose = 1)

Train on 18538 samples, validate on 2577 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f2df47d27f0>

## Random Forest

In [41]:
from sklearn.ensemble import RandomForestClassifier

In [42]:
clf = RandomForestClassifier()
clf.fit(x_train_2,y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [43]:
clf.score(x_val_2,y_val)

0.32945285215366704

## GB Classifier

In [44]:
from sklearn.ensemble import GradientBoostingClassifier

In [45]:
clfgb = GradientBoostingClassifier()
clfgb.fit(x_train_2,y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

In [46]:
clfgb.score(x_val_2,y_val)

0.3942568878540939