# Keyword Spotting with different architectures

In [1]:
import numpy as np
np.random.seed(1234)

from os.path import join as pjoin
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from load_utils import load_dataset, load_dataset_keywords
from CNNarchitectures import KWS_CNN_model
CHECKPOINTS_PATH = 'models/'

2022-12-19 18:46:21.038385: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-19 18:46:21.217231: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-12-19 18:46:21.223372: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-19 18:46:21.223390: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore 

## Training increasing keywords

In [None]:
# Training with different keywords
data_dir = 'speechdataset/processed_data'

categories = ['marvin', 'no', 'yes', 'bed', 'down', 'forward', 'happy', 'house', 'tree', 'visual', 'bird',
              'eight', 'four', 'learn','right', 'stop', 'two', 'wow', 'cat', 'five', 'nine', 'one', 'six',
              'go', 'left', 'off', 'seven', 'up', 'backward', 'dog', 'follow', 'on', 'sheila', 'three', 'zero']

# Early stopping criteria
early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=1)


for n_keywords in np.arange(4, 11, 1):
    keywords = categories[:int(n_keywords)]
    n_labels = n_keywords + 1 # number of keywords + not a keyword

    print(f'Training with ', n_keywords, 'keywords:', keywords)
    
    print('Loading data...')
    # Load data
    X_train, Y_train, X_test, Y_test = load_dataset_keywords(data_dir, keywords, categories, frames=50,
                                                             winlen=0.025, winstep=0.02, nfilt=26)
    
    print('Data loaded. Training...')
    model = KWS_CNN_model((50,39,1), dropout=20, norm='False')
    model.compile(optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(), metrics=["accuracy"])

    model.fit(np.transpose(X_train, [2,0,1]), tf.one_hot(Y_train, n_labels, 1, 0), epochs=5, batch_size=10,
                        validation_split=0.1, callbacks=[early_stop_callback])

    model.save(pjoin(CHECKPOINTS_PATH, f'CNNbestmodel_25ms_20ms_26_{n_keywords}keywords'))

    loss, acc = model.evaluate(np.transpose(X_test, [2,0,1]), tf.one_hot(Y_test, n_labels, 1, 0), verbose=2)
    print("Test accuracy: {:5.2f}%".format(100 * acc))

In [None]:
# Results
print(f'Training best model with ', 3, 'keywords:', ['marvin', 'no', 'yes'])
new_model = tf.keras.models.load_model(pjoin(CHECKPOINTS_PATH,
                                            f'CNNmodel_25ms_20ms_26_dropout20'))
loss, acc = new_model.evaluate(np.transpose(X_test, [2,0,1]), tf.one_hot(Y_test, n_labels, 1, 0), verbose=2)

for n_keywords in np.arange(4, 11, 1)::
    print(f'Training best model with ', n_keywords, 'keywords:', keywords)
    new_model = tf.keras.models.load_model(pjoin(CHECKPOINTS_PATH,
                                                 f'CNNbestmodel_25ms_20ms_26_{n_keywords}keywords'))
    loss, acc = new_model.evaluate(np.transpose(X_test, [2,0,1]), tf.one_hot(Y_test, n_labels, 1, 0), verbose=2)
    print("Test accuracy: {:5.8f}%".format(100 * acc), "\n")

## Training with different dropout

In [2]:
data_dir = 'speechdataset/processed_data'

keywords = ['marvin', 'no', 'yes']

categories = ['bed', 'down', 'forward', 'house', 'nine', 'one', 'six', 'tree', 'visual', 'bird', 'eight', 
              'four', 'learn', 'no','right', 'stop', 'two', 'wow', 'cat', 'five', 'go', 'left', 'off', 
              'seven', 'up', 'yes', 'backward', 'dog', 'follow', 'happy', 'marvin', 'on', 'sheila', 'three', 'zero']

n_labels = len(keywords) + 1 # number of keywords + not a keyword

# Load data
X_train, Y_train, X_test, Y_test = load_dataset_keywords(data_dir, keywords, categories, frames=50,
                                                         winlen=0.025, winstep=0.02, nfilt=26)

In [10]:
# Parameters
dropouts = [0.1, 0.15, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
norm = 'False'

# Early stopping criteria
early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=1)


for dropout in dropouts:
    print(f'Training with ', dropout, 'dropout')

    model = KWS_CNN_model((50,39,1), dropout=dropout, norm=norm)
    model.compile(optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(), metrics=["accuracy"])

    model.fit(np.transpose(X_train, [2,0,1]), tf.one_hot(Y_train, n_labels, 1, 0), epochs=5, batch_size=10,
                        validation_split=0.1, callbacks=[early_stop_callback])

    model.save(pjoin(CHECKPOINTS_PATH, f'CNNmodel_25ms_20ms_26_dropout{int(dropout*100)}'))

    loss, acc = model.evaluate(np.transpose(X_test, [2,0,1]), tf.one_hot(Y_test, n_labels, 1, 0), verbose=2)
    print("Test accuracy: {:5.2f}%".format(100 * acc))

Training with  0.1 dropout
Epoch 1/5
Epoch 2/5
Epoch 3/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout10/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout10/assets


497/497 - 78s - loss: 0.1177 - accuracy: 0.9640 - 78s/epoch - 156ms/step
Test accuracy: 96.40%
Training with  0.15 dropout
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout15/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout15/assets


497/497 - 77s - loss: 0.0770 - accuracy: 0.9744 - 77s/epoch - 156ms/step
Test accuracy: 97.44%
Training with  0.25 dropout
Epoch 1/5
Epoch 2/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout25/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout25/assets


497/497 - 78s - loss: 0.1315 - accuracy: 0.9712 - 78s/epoch - 156ms/step
Test accuracy: 97.12%
Training with  0.3 dropout
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout30/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout30/assets


497/497 - 71s - loss: 0.1087 - accuracy: 0.9711 - 71s/epoch - 143ms/step
Test accuracy: 97.11%
Training with  0.35 dropout
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout35/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout35/assets


497/497 - 71s - loss: 0.1128 - accuracy: 0.9724 - 71s/epoch - 142ms/step
Test accuracy: 97.24%
Training with  0.4 dropout
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout40/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout40/assets


497/497 - 77s - loss: 0.1322 - accuracy: 0.9736 - 77s/epoch - 155ms/step
Test accuracy: 97.36%
Training with  0.45 dropout
Epoch 1/5
Epoch 2/5
Epoch 3/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout45/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout45/assets


497/497 - 78s - loss: 0.1484 - accuracy: 0.9697 - 78s/epoch - 156ms/step
Test accuracy: 96.97%
Training with  0.5 dropout
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout50/assets


INFO:tensorflow:Assets written to: models/CNNmodel_25ms_20ms_26_dropout50/assets


497/497 - 75s - loss: 0.1100 - accuracy: 0.9740 - 75s/epoch - 151ms/step
Test accuracy: 97.40%


In [11]:
dropouts = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
for dropout in dropouts:
    print(f'Training with', dropout, 'dropout')
    new_model = tf.keras.models.load_model(pjoin(CHECKPOINTS_PATH,
                                                 f'CNNmodel_25ms_20ms_26_dropout{int(dropout*100)}'))
    loss, acc = new_model.evaluate(np.transpose(X_test, [2,0,1]), tf.one_hot(Y_test, n_labels, 1, 0), verbose=2)
    print("Test accuracy: {:5.8f}%".format(100 * acc), "\n")

Training with 0.1 dropout
497/497 - 76s - loss: 0.1177 - accuracy: 0.9640 - 76s/epoch - 153ms/step
Test accuracy: 96.39685154% 

Training with 0.15 dropout
497/497 - 76s - loss: 0.0770 - accuracy: 0.9744 - 76s/epoch - 154ms/step
Test accuracy: 97.43621945% 

Training with 0.2 dropout
497/497 - 76s - loss: 0.0925 - accuracy: 0.9772 - 76s/epoch - 153ms/step
Test accuracy: 97.71968722% 

Training with 0.25 dropout
497/497 - 77s - loss: 0.1315 - accuracy: 0.9712 - 77s/epoch - 154ms/step
Test accuracy: 97.12126255% 

Training with 0.3 dropout
497/497 - 78s - loss: 0.1087 - accuracy: 0.9711 - 78s/epoch - 157ms/step
Test accuracy: 97.10866213% 

Training with 0.35 dropout
497/497 - 79s - loss: 0.1128 - accuracy: 0.9724 - 79s/epoch - 158ms/step
Test accuracy: 97.24094272% 

Training with 0.4 dropout
497/497 - 78s - loss: 0.1322 - accuracy: 0.9736 - 78s/epoch - 157ms/step
Test accuracy: 97.36062884% 

Training with 0.45 dropout
497/497 - 77s - loss: 0.1484 - accuracy: 0.9697 - 77s/epoch - 155ms