# Keyword Spotting with different architectures

In [14]:
import numpy as np
np.random.seed(1234)

from os.path import join as pjoin
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from utils import load_dataset
CHECKPOINTS_PATH = 'checkPoints/'

## Data loading

In [2]:
data_dir = 'speechdataset/processed_data'
keywords = ['bed', 'down', 'forward', 'house', 'nine', 'one', 'six', 'tree']
n_words = len(keywords)

X_train, Y_train, X_test, Y_test = load_dataset(data_dir, keywords)

In [3]:
print(X_train.shape, Y_train.shape)

(99, 39, 19587) (19587,)


## Convolutional Neural Network Architecture

In [4]:
"""
class KWS_CNN(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=94, kernel_size=(66,8), strides = (1,1), 
                                            padding = 'valid', kernel_initializer = glorot_uniform(seed=0))
        self.conv2 = tf.keras.layers.Conv2D(filters=94, kernel_size=(20,4), strides = (1,1), 
                               padding = 'valid', kernel_initializer = glorot_uniform(seed=0))
        self.maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2,3), strides=(2,3), padding='valid')
        self.act = tf.keras.layers.Activation('relu')
        self.lin = tf.keras.layers.Dense(99)
        self.dens = tf.keras.layers.Dense(396, activation='sigmoid')
        self.softmx = tf.keras.layers.Softmax()
        self.flat = tf.keras.layers.Flatten()
        self.dropout = tf.keras.layers.Dropout(0.5)
    
    # forward pass
    def call(self, inputs, dropout=False):
        x = self.conv1(inputs)
        #if dropout:
            #x = self.dropout(x)
        x = self.maxpool(x)
        x = self.act(x)
        x = self.conv2(x)
        x = self.act(x)
        x = self.flat(x)
        x = self.lin(x)
        x = self.dens(x)   
        return self.softmx(x)
    
    #loss
    def compute_loss(self, x, y, y_pred)
    """

"\nclass KWS_CNN(tf.keras.Model):\n    \n    def __init__(self):\n        super().__init__()\n        self.conv1 = tf.keras.layers.Conv2D(filters=94, kernel_size=(66,8), strides = (1,1), \n                                            padding = 'valid', kernel_initializer = glorot_uniform(seed=0))\n        self.conv2 = tf.keras.layers.Conv2D(filters=94, kernel_size=(20,4), strides = (1,1), \n                               padding = 'valid', kernel_initializer = glorot_uniform(seed=0))\n        self.maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2,3), strides=(2,3), padding='valid')\n        self.act = tf.keras.layers.Activation('relu')\n        self.lin = tf.keras.layers.Dense(99)\n        self.dens = tf.keras.layers.Dense(396, activation='sigmoid')\n        self.softmx = tf.keras.layers.Softmax()\n        self.flat = tf.keras.layers.Flatten()\n        self.dropout = tf.keras.layers.Dropout(0.5)\n    \n    # forward pass\n    def call(self, inputs, dropout=False):\n        x = self.co

In [10]:
def KWS_CNN_model(input_shape):
    """
    Arguments:
    :param input_shape: shape of the data of the dataset

    :returns Model: a tf.keras.Model() instance
    """
    
    X_input = tf.keras.Input(input_shape)
    n_frames = input_shape[0]
    
    # CONV -> pooling -> CONV -> lin -> Dense?
    # First convolution
    X = tf.keras.layers.Conv2D(filters=94, kernel_size=(int(2*n_frames/3),8), strides = (1,1), 
                               padding = 'same', name='First_Conv')(X_input)
    #Dropout
    #X = Dropout(rate = 0.2)(X)
    
    #Pooling on time and frequency
    X = tf.keras.layers.MaxPooling2D(pool_size=(2,3), strides=(2,3), padding='valid', name='MaxPooling')(X)
    X = tf.keras.layers.Activation('relu')(X)
    
    #Second convolution
    X = tf.keras.layers.Conv2D(filters=94, kernel_size=(int(n_frames/5),4), strides = (1,1), 
                               padding = 'same', name='Second_Conv')(X)
    #Dropout
    #X = Dropout(rate = 0.2)(X)
    
    X = tf.keras.layers.Activation('relu')(X)
    
    # Linear layer
    X = tf.keras.layers.Flatten()(X)
    X = tf.keras.layers.Dense(32, name='Linear')(X)
    
    # Dense layer
    X = tf.keras.layers.Dense(128, activation='relu', name='Dense1')(X)
    X = tf.keras.layers.Dense(128, activation='relu', name='Dense2')(X)
    X = tf.keras.layers.Dense(128, activation='relu', name='Dense3')(X)  
    
    # Softmax
    X = tf.keras.layers.Dense(n_words, activation='softmax', name='Softmax')(X)
    
    # MODEL
    model = Model(inputs = X_input, outputs = X, name='KWS_CNN')
    
    return model

In [11]:
model = KWS_CNN_model((99,39,1))
model.summary()

Model: "KWS_CNN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 99, 39, 1)]       0         
                                                                 
 First_Conv (Conv2D)         (None, 99, 39, 94)        49726     
                                                                 
 MaxPooling (MaxPooling2D)   (None, 49, 13, 94)        0         
                                                                 
 activation_2 (Activation)   (None, 49, 13, 94)        0         
                                                                 
 Second_Conv (Conv2D)        (None, 49, 13, 94)        671630    
                                                                 
 activation_3 (Activation)   (None, 49, 13, 94)        0         
                                                                 
 flatten_1 (Flatten)         (None, 59878)             0   

In [18]:
model.compile(optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(), metrics=["accuracy"])

In [None]:
# Train the model
early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Fit
#history = model.fit(np.transpose(X_train, [2,0,1]), Y_train, epochs=num_epochs, validation_split=0.1, callbacks=[early_stop_callback])

una_jota = tf.one_hot(Y_train, 8, 1, 0)
print(una_jota)
history = model.fit(np.transpose(X_train, [2,0,1]), una_jota , epochs=100, batch_size=10,
                    validation_split=0.1)

In [None]:
model.save_weights(CHECKPOINTS_PATH)

In [21]:
# Loads the weights
model.load_weights(CHECKPOINTS_PATH)

# Re-evaluate the model
loss, acc = model.evaluate(np.transpose(X_test, [2,0,1]), tf.one_hot(Y_test, 8, 1, 0), verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

109/109 - 49s - loss: 2.0072 - accuracy: 0.1675 - 49s/epoch - 446ms/step
Restored model, accuracy: 16.75%


## Clustering, RNN, ...

In [None]:
model