In [18]:
from os import listdir
from os.path import isdir, join
from tensorflow.keras import layers, models
import numpy as np


In [19]:
# Create list of all targets (minus background noise)
dataset_path = 'C:\\\\MCT - IoT Engineer\\\\Research Project\\\\data_speech_commands_v0.02'
all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, name))]
all_targets.remove('_background_noise_')
print('Available targets:', all_targets)


Available targets: ['backward', 'bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'follow', 'forward', 'four', 'go', 'happy', 'house', 'learn', 'left', 'marvin', 'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'six', 'stop', 'three', 'tree', 'two', 'up', 'visual', 'wow', 'yes', 'zero']


In [20]:
# Settings
feature_sets_path = 'C:\\MCT - IoT Engineer\\Research Project\\data train'
feature_sets_filename = 'all_targets_mfcc_sets.npz'
model_filename = 'wake_word_on_go_model.h5'
target_words = ['on', 'go']  # Replace with your target words


In [21]:
# Load feature sets
feature_sets = np.load(join(feature_sets_path, feature_sets_filename))
print('Feature sets loaded:', feature_sets.files)


Feature sets loaded: ['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test']


In [22]:
# Assign feature sets
x_train = feature_sets['x_train']
y_train = feature_sets['y_train']
x_val = feature_sets['x_val']
y_val = feature_sets['y_val']
x_test = feature_sets['x_test']
y_test = feature_sets['y_test']

# Expand dimensions to include the channel (e.g., grayscale images)
x_train = np.expand_dims(x_train, axis=-1)
x_val = np.expand_dims(x_val, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

print(f"x_train shape: {x_train.shape}, x_val shape: {x_val.shape}, x_test shape: {x_test.shape}")


x_train shape: (77509, 16, 16, 1), x_val shape: (9667, 16, 16, 1), x_test shape: (9680, 16, 16, 1)


In [23]:
# Relabel the dataset to include only the target words and an "unknown" class
unknown_class_index = len(target_words)
target_indices = [all_targets.index(word) for word in target_words]

y_train = np.array([target_indices.index(y) if y in target_indices else unknown_class_index for y in y_train])
y_val = np.array([target_indices.index(y) if y in target_indices else unknown_class_index for y in y_val])
y_test = np.array([target_indices.index(y) if y in target_indices else unknown_class_index for y in y_test])

print(f"Unique labels in y_train: {np.unique(y_train)}")
print(f"Unique labels in y_val: {np.unique(y_val)}")
print(f"Unique labels in y_test: {np.unique(y_test)}")


Unique labels in y_train: [0 1 2]
Unique labels in y_val: [0 1 2]
Unique labels in y_test: [0 1 2]


In [24]:
# Build the model
model = models.Sequential([
    layers.Input(shape=x_train.shape[1:]),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(target_words) + 1, activation='softmax'),  # +1 for unknown class
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [25]:
# Train the model
history = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=10,  # Adjust epochs as needed
    batch_size=32  # Adjust batch size as needed
)


Epoch 1/10
[1m2423/2423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9307 - loss: 0.2452 - val_accuracy: 0.9458 - val_loss: 0.1634
Epoch 2/10
[1m2423/2423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9547 - loss: 0.1338 - val_accuracy: 0.9610 - val_loss: 0.1164
Epoch 3/10
[1m2423/2423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9607 - loss: 0.1147 - val_accuracy: 0.9624 - val_loss: 0.1117
Epoch 4/10
[1m2423/2423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9659 - loss: 0.0996 - val_accuracy: 0.9677 - val_loss: 0.1059
Epoch 5/10
[1m2423/2423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9689 - loss: 0.0909 - val_accuracy: 0.9645 - val_loss: 0.1100
Epoch 6/10
[1m2423/2423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9726 - loss: 0.0815 - val_accuracy: 0.9644 - val_loss: 0.1055
Epoch 7/10
[1m

In [26]:
# Evaluate and save the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.2f}')
model.save(join(feature_sets_path, model_filename))
print(f'Model saved to {model_filename}')


[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9670 - loss: 0.0993
Test accuracy: 0.97




Model saved to wake_word_on_go_model.h5
