# Lab 5: Google Speech Commands

## Imports

In [27]:
import copy
import wave
from pathlib import Path
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Input, Conv1D, AvgPool1D, MaxPool1D, ZeroPadding1D, BatchNormalization, Flatten, Dense, Activation
from keras.activations import softmax
from keras.utils import get_file
from keras.utils import to_categorical

import os

import wave 
import xenocanto
import random

import librosa
import soundfile as sf



# Defining the classes

In [41]:
birds = ['Yellowhammer','SpottedFlycatcher','CommonCuckoo','CirlBunting']
dataset_dir = Path('dataset')


# Download, and store cirds recordings


In [15]:

if not (dataset_dir/'audio/testing_list.txt').exists(): # Assume dataset already downloaded/extracted if testing list is present
    for bird in birds : 
        xenocanto.metadata([bird,"type:song","q:A"])
        xenocanto.metadata([bird,"type:song","q:B"])
        await xenocanto.download([bird,"type:song","q:A"],2)
        await xenocanto.download([bird,"type:song","q:B"],2)
        if bird == 'Muscicapa striata' :
            xenocanto.metadata([bird,"type:song","q:C"])
            await xenocanto.download([bird,"type:song","q:C"],2)


Retrieving metadata...
Downloading metadata page 1...
Metadata retrieval complete.
Retrieving metadata...
Downloading metadata page 1...
Metadata retrieval complete.
Retrieving metadata...
Downloading metadata page 1...
Metadata retrieval complete.
313 recordings found, downloading...
Creating recording folder at dataset/audio/CirlBunting/
Download complete.
Retrieving metadata...
Downloading metadata page 1...
Metadata retrieval complete.
234 recordings found, downloading...
Download complete.


# fitting the records


In [16]:
def split_audio_file(audio_file_path, output_folder, output_file_name, split_length=1):
    # if filename starts with splitted_ then skip
    if output_file_name.startswith("splitted_"):
        print("file already splitted, skipping")
        return
    if any("splitted_"+output_file_name in f for f in os.listdir(output_folder)):
        print("file already exists, skipping")
        return
    audio_signal, sample_rate = librosa.load(audio_file_path, sr=None)

    len_audio_signal = len(audio_signal)

    split_length_samples = split_length * sample_rate
    audio_signal = audio_signal[:len_audio_signal - len_audio_signal % split_length_samples]
    if len(audio_signal) > split_length:
        audio_signal = np.split(audio_signal, len(audio_signal) / split_length_samples)

    number_of_files = str(int(len_audio_signal / sample_rate))
    for index, y_split in enumerate(audio_signal):
        sf.write(
            os.path.join(output_folder,
                         "splitted_" + output_file_name + "_" + str(index + 1) + "_of_" + number_of_files + ".wav"),
            y_split, sample_rate)
    #os.remove(audio_file_path)

# chosing training Records

In [30]:
import os
import numpy as np

# create train, test and text pointers
recordings_folder = "dataset/audio"
main_bird = "Yellowhammer"

file_name = "testing_list.txt"

count = 0
with open(os.path.join(recordings_folder, file_name), "w", encoding="utf-8") as f:
    for bird_type in os.listdir(recordings_folder):
        
        bird_folder = os.path.join(recordings_folder, bird_type)
        if os.path.isdir(bird_folder) and bird_folder != "dataset/audio/.ipynb_checkpoints":
            print(bird_folder)
            for recording in os.listdir(bird_folder):
                sound_filename = os.path.join(bird_folder, recording)
                if os.path.isfile(sound_filename) and "splitted_" in sound_filename:
                    if np.random.rand() > 0.3:
                        f.write(sound_filename + "\n")
                        count += 1
                        
    ratio = count / len(os.listdir(os.path.join(recordings_folder, main_bird.replace(" ", "_"))))
    number_of_main_bird_recordings = count / (
                sum(os.path.isdir(os.path.join(recordings_folder, f)) for f in os.listdir(recordings_folder)) - 1)

    count = 0

    for recording in os.listdir(os.path.join(recordings_folder, main_bird.replace(" ", "_"))):
        if count > number_of_main_bird_recordings:
            break

        # if random number between 0 and 1 is greater than 0.3 then add to testing_list.txt
        if np.random.rand() > ratio:
            count += 1
            f.write(os.path.join(recordings_folder, main_bird.replace(" ", "_"), recording) + "\n")

print("Done")

dataset/audio/CirlBunting
dataset/audio/Yellowhammer
dataset/audio/SpottedFlycatcher
dataset/audio/CommonCuckoo
Done


In [None]:
print("splitting audio into multiple recordings")
for bird_type in os.listdir(recordings_folder):

    bird_folder = os.path.join(recordings_folder, bird_type)
    if os.path.isdir(bird_folder) and bird_folder != "dataset/audio/.ipynb_checkpoints":
        print("looking into folder:", bird_folder)
        number_of_recordings = len(os.listdir(bird_folder))
        limit = 200
        for index, recording in enumerate(os.listdir(bird_folder)):
            if index == limit: 
                break
            print("splitting recording", str(index+1), recording, "out of", number_of_recordings, "for bird", bird_type)
            recording_path = os.path.join(bird_folder, recording)
            split_audio_file(recording_path, bird_folder, recording.split(".")[0], 3)

# Loading the records

In [52]:
dataset_dir = Path('dataset/audio')

CLASSES = birds

with (dataset_dir/ 'testing_list.txt').open(encoding='utf-8') as f:
    testing_list = f.read().splitlines()

x_train = []
y_train = []
x_test = []
y_test = []
for recording in dataset_dir.glob('**/*.wav'):
    if not recording.parent.name in CLASSES:
        continue
    if "splitted_" not in str(recording):
        continue
    label = CLASSES.index(recording.parent.name)

    with wave.open(str(recording)) as f :
        data = np.frombuffer(f.readframes(f.getnframes()), dtype=np.int16).copy()

    data = data.astype(np.float32)
    data.resize((16000, 1))
    if "splitted_" in str(recording):
        if str(recording) in testing_list:
            x_train.append(data)
            y_train.append(label)
        elif y_train.count(label) < 2400:
            x_test.append(data)
            y_test.append(label)
x_train = np.array(x_train)
y_train = to_categorical(np.array(y_train))
x_test = np.array(x_test)
y_test = to_categorical(np.array(y_test))

In [53]:
print(x_test.shape)
print(y_test.shape)
print(x_train.shape)
print(y_train.shape)

(4102, 16000, 1)
(4102, 4)
(13087, 16000, 1)
(13087, 4)


In [55]:
x_mean = x_train.mean()
x_std = x_train.std()

x_train -= x_mean
x_test -= x_mean
x_train /= x_std
x_test /= x_std


In [56]:
np.savetxt('x_test.csv', x_test.reshape(x_test.shape[0], -1), delimiter=',', fmt='%s')
np.savetxt('y_test.csv', y_test, delimiter=',', fmt='%s')

# Defining the model

In [59]:
# Modifier

model = Sequential()
model.add(Input(shape=(16000, 1)))
model.add(MaxPool1D(pool_size=20, padding='valid'))
model.add(Conv1D(filters=8, kernel_size=40, activation='relu'))
model.add(MaxPool1D(pool_size=4, padding='valid'))
model.add(Conv1D(filters=16, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, padding='valid'))
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(MaxPool1D(pool_size=4, padding='valid'))
model.add(AvgPool1D(pool_size=8))
model.add(Flatten())
model.add(Dense(units=4))
model.add(Activation('softmax'))  # SoftMax activation needs to be separate from Dense to remove it later on
# EXPLORE Learning Rate
opt = tf.keras.optimizers.Adam(learning_rate=10e-4)
model.summary()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 max_pooling1d_4 (MaxPoolin  (None, 800, 1)            0         
 g1D)                                                            
                                                                 
 conv1d_3 (Conv1D)           (None, 761, 8)            328       
                                                                 
 max_pooling1d_5 (MaxPoolin  (None, 190, 8)            0         
 g1D)                                                            
                                                                 
 conv1d_4 (Conv1D)           (None, 188, 16)           400       
                                                                 
 max_pooling1d_6 (MaxPoolin  (None, 47, 16)            0         
 g1D)                                                            
                                                      

### Training

In [61]:
model.fit(x_train, y_train, epochs=20, batch_size=100, validation_data=(x_test, y_test))

2024-04-27 16:44:01.981289: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 837568000 exceeds 10% of free system memory.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7f9f16ab3350>

### Evaluating

In [62]:
model.evaluate(x_test, y_test, verbose=2)
pred_test = model.predict(x_test)
print(tf.math.confusion_matrix(y_test.argmax(axis=1), pred_test.argmax(axis=1)))

129/129 - 1s - loss: 1.0975 - categorical_accuracy: 0.5215 - 1s/epoch - 11ms/step
tf.Tensor(
[[830 119  34  49]
 [475 445  45  86]
 [178 122 649  81]
 [563 150  61 215]], shape=(4, 4), dtype=int32)


In [64]:
model = tf.keras.Model(model.input, model.layers[-2].output, name=model.name)

## Save trained model

In [101]:
model.save('lab_gsc.h5')

  saving_api.save_model(


## Remove SoftMax layer

In [5]:


# Load the model
model = load_model('lab_gsc.h5')
if isinstance(model.layers[-1], Activation) and model.layers[-1].activation == softmax:
    model = tf.keras.Model(model.input, model.layers[-2].output, name=model.name)
else:
    print('Error: last layer is not SoftMax Activation')

## Install Qualia-CodeGen for C inference code generation

In [69]:
%pip install qualia_codegen_core
import qualia_codegen_core
from qualia_codegen_core.graph.KerasModelGraph import KerasModelGraph
from qualia_codegen_core.graph.Quantization import Quantization
from qualia_codegen_core.graph.RoundMode import RoundMode

from importlib.resources import files
main_path = str((files('qualia_codegen_core.examples')/'Linux'/'main.cpp').resolve())

Collecting qualia_codegen_core
  Downloading qualia_codegen_core-2.2.0-py3-none-any.whl.metadata (4.3 kB)
Downloading qualia_codegen_core-2.2.0-py3-none-any.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
[?25hInstalling collected packages: qualia_codegen_core
Successfully installed qualia_codegen_core-2.2.0
Note: you may need to restart the kernel to use updated packages.


Cannot find PyTorch, PyTorch framework will be unavailable


## Convert Keras Model to Qualia-CodeGen's internal representation

In [70]:
modelgraph = KerasModelGraph(model).convert()
print(modelgraph)

—————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Inputs                                           | Layer                                            | Outputs                                          | Input shape                                      | Output shape                                    
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
                                                 | input_2                                          | max_pooling1d_4                                  | (1, 16000, 1)                                    | ((1, 16000, 1),)                   

## Generate C code for the trained model with 32-bit floating-point representation

In [11]:
float_modelgraph = copy.deepcopy(modelgraph)

# layer quantization annotations for float32
for node in float_modelgraph.nodes:
    # No scale factor if not fixed-point quantization on integers
    node.q = Quantization(
            number_type=float,
            width=32,
            long_width=32,
            weights_scale_factor=0,
            output_scale_factor=0,
            weights_round_mode=RoundMode.NONE,
            output_round_mode=RoundMode.NONE,
            )

float_res = qualia_codegen_core.Converter(output_path=Path('gsc_output_floating')).convert_model(float_modelgraph)

with open('gsc_model_floating.h', 'w') as f:
    f.write(float_res)

Graphviz not available


## Compile the 32-bit floating-point C code for x86 and evaluate on small dataset

In [None]:
!g++ -std=c++17 -Wall -Wextra -pedantic -Ofast -o gsc_floating -include gsc_output_floating/include/defines.h -Igsc_output_floating/include gsc_output_floating/model.c {main_path}
!./gsc_floating x_test_gsc_250.csv y_test_gsc_250.csv

## Generate C code for the trained model with 16-bit fixed-point representation

In [71]:
fixed_modelgraph = copy.deepcopy(modelgraph)

# layer quantization annotations for int16 Q9.7
for node in fixed_modelgraph.nodes:
    node.q = Quantization(
            number_type=int,
            width=16,
            long_width=32,
            weights_scale_factor=7,
            output_scale_factor=7,
            weights_round_mode=RoundMode.FLOOR,
            output_round_mode=RoundMode.FLOOR,
            )

fixed_res = qualia_codegen_core.Converter(output_path=Path('gsc_output_fixed')).convert_model(fixed_modelgraph)

with open('gsc_model_fixed.h', 'w') as f:
    f.write(fixed_res)

Graphviz not available


## Compile the 16-bit fixed-point C code for x86 and evaluate on small dataset

In [73]:
!g++ -std=c++17 -Wall -Wextra -pedantic -Ofast -o gsc_fixed -include gsc_output_fixed/include/defines.h -Igsc_output_fixed/include gsc_output_fixed/model.c {main_path}
!./gsc_fixed x_test.csv y_test.csv

Testing accuracy: 0.477328
