# Keyword Spotting with different architectures

In [1]:
import numpy as np
np.random.seed(1234)

from os.path import join as pjoin
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from utils import load_dataset

2022-12-15 20:14:48.548545: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-15 20:14:48.750950: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-12-15 20:14:48.757477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-15 20:14:48.757494: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore 

## Data loading

In [2]:
data_dir = '/mnt/speechdataset/processed_data'
keywords = ['bed', 'down', 'forward', 'house', 'nine', 'one', 'six', 'tree']

X_train, Y_train, X_test, Y_test = load_dataset(data_dir, keywords)

In [7]:
print(X_train.shape, Y_train.shape)

(99, 39, 19587) (19587,)


## Convolutional Neural Network Architecture

In [None]:
class KWS_CNN(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=94, kernel_size=(66,8), strides = (1,1), 
                                            padding = 'valid', kernel_initializer = glorot_uniform(seed=0))
        self.conv2 = tf.keras.layers.Conv2D(filters=94, kernel_size=(20,4), strides = (1,1), 
                               padding = 'valid', kernel_initializer = glorot_uniform(seed=0))
        self.maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2,3), strides=(2,3), padding='valid')
        self.act = tf.keras.layers.Activation('relu')
        self.lin = tf.keras.layers.Dense(99)
        self.dens = tf.keras.layers.Dense(396, activation='sigmoid')
        self.softmx = tf.keras.layers.Softmax()
        self.flat = tf.keras.layers.Flatten()
        self.dropout = tf.keras.layers.Dropout(0.5)
    
    # forward pass
    def call(self, inputs, dropout=False):
        x = self.conv1(inputs)
        #if dropout:
            #x = self.dropout(x)
        x = self.maxpool(x)
        x = self.act(x)
        x = self.conv2(x)
        x = self.act(x)
        x = self.flat(x)
        x = self.lin(x)
        x = self.dens(x)   
        return self.softmx(x)
    
    #loss
    def compute_loss(self, x, y, y_pred)

In [10]:
def KWS_CNN_model(input_shape):
    """
    Arguments:
    :param input_shape: shape of the data of the dataset

    :returns Model: a tf.keras.Model() instance
    """
    
    X_input = tf.keras.Input(input_shape)
    
    # CONV -> pooling -> CONV -> lin -> Dense?
    # First convolution
    X = tf.keras.layers.Conv2D(filters=94, kernel_size=(66,8), strides = (1,1), 
               padding = 'same', kernel_initializer = glorot_uniform(seed=0))(X_input)
    
    #Pooling on time and frequency
    X = tf.keras.layers.MaxPooling2D(pool_size=(2,3), strides=(2,3), padding='valid')(X)
    X = tf.keras.layers.Activation('relu')(X)
    
    #Second convolution
    X = tf.keras.layers.Conv2D(filters=94, kernel_size=(20,4), strides = (1,1), 
                               padding = 'valid', kernel_initializer = glorot_uniform(seed=0))(X)
    X = tf.keras.layers.Activation('relu')(X)
    
    # Linear layer
    X = tf.keras.layers.Flatten()(X)
    X = tf.keras.layers.Dense(32)(X)
    
    # Dense layer
    X = tf.keras.layers.Dense(128, activation='sigmoid')(X)
    
    # Softmax
    X = tf.keras.layers.Softmax()(X)
    
    # MODEL
    model = Model(inputs = X_input, outputs = X, name='KWS_CNN')
    
    return model

In [11]:
model = KWS_CNN_model((99,39,1))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [22]:
# Train the model
num_epochs = 30

early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Fit
history = model.fit(np.transpose(X_train, [2,0,1]), Y_train, epochs=num_epochs, 
                    validation_split=0.1,
                    callbacks=[early_stop_callback])

Epoch 1/30


ValueError: Creating variables on a non-first call to a function decorated with tf.function.

In [21]:
np.transpose(X_train, [2,0,1]).shape

(19587, 99, 39)

## Clustering, RNN, ...