# Packages

## Global Packages

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
import sys
import json
import math
import wave
import shutil
import pathlib
import librosa
import numpy as np
import pandas as pd
import seaborn as sns
import soundfile as sf
import librosa.display
import tensorflow as tf
# import sounddevice as sd
import matplotlib.pyplot as plt
from IPython.display import Audio, display
from tensorflow.keras import layers, models
from tensorflow.keras.models import load_model

## Constants

In [3]:
SEED = 42
BATCH_SIZE = 64
SAMPLE_RATE = 16000
VALIDATION_SPLIT = 0.2


# MFCC parameters
N_MFCC = 13
N_FFT = 2048
HOP_LENGTH = 512
TRACK_DURATION = 2
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION


# Path to the datasets

The dataset's audio clips are of 6 classes and stored in 6 folders corresponding to each speech command: 
- `ddyo`- `kkono` - `mu maaso` - `emabega` - `yimirira` - `unknown`

In [4]:
train_data_dir = pathlib.Path('Dataset/Train')
test_data_dir = pathlib.Path('Dataset/Test')

train_data_needs_preprocessing = pathlib.Path('Dataset/Train_need_preprocessing')
test_data_needs_preprocessing = pathlib.Path('Dataset/Test_need_preprocessing')

TRAIN_JSON_PATH = pathlib.Path('Dataset/json/mfcc_train_data.json')
TEST_JSON_PATH = pathlib.Path('Dataset/json/mfcc_test_data.json')

# Labels

In [5]:
def list_directory_contents(directory, label):
    contents = np.array(tf.io.gfile.listdir(str(directory)))
    print(f'{label} commands labels: {contents}')
    return contents

In [6]:
train_commands = list_directory_contents(train_data_dir, 'Train')
test_commands = list_directory_contents(test_data_dir, 'Test')

Train commands labels: ['ddyo' 'mumaaso' 'unknown' 'kkono' 'emabega' 'yimirira']
Test commands labels: ['ddyo' 'mumaaso' 'unknown' 'kkono' 'emabega' 'yimirira']


# Load sample audio files

In [7]:
emabega_file_path = os.path.join(train_data_dir, 'emabega', 'emabega.wav')
ddyo_file_path = os.path.join(train_data_dir, 'ddyo', 'ddyo.wav')
yimirira_file_path = os.path.join(train_data_dir, 'yimirira', 'yimirira.wav')
kkono_file_path = os.path.join(train_data_dir, 'kkono', 'kkono.wav')
mumasso_file_path = os.path.join(train_data_dir, 'mumaaso', 'mumaaso.wav')
unknown_file_path = os.path.join(train_data_dir, 'unknown', 'unknown_001.wav')

file_paths = [emabega_file_path, ddyo_file_path, yimirira_file_path, kkono_file_path, mumasso_file_path, unknown_file_path]

# Dataset Preparaption

In [8]:
# Function to prepare dataset
def prepare_dataset(data_dir, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=2):
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": [],
        "files": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(data_dir)):
        if dirpath is not data_dir:
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))

            for f in filenames:
                file_path = os.path.join(dirpath, f)
                signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE, duration=TRACK_DURATION)

                if len(signal) >= SAMPLE_RATE:
                    signal = signal[:SAMPLE_RATE]

                    for d in range(num_segments):
                        start = samples_per_segment * d
                        finish = start + samples_per_segment

                        mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                        mfcc = mfcc.T

                        if len(mfcc) == num_mfcc_vectors_per_segment:
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i-1)
                            data["files"].append(file_path)
                            print("{}, segment:{}".format(file_path, d+1))

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

In [None]:
def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=6):

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }

    samples_per_segment = int(SAMPLES_PER_AUDIO / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # loop through all sub-folders
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're processing the sub-folder level
        if dirpath is not dataset_path:

            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print(f"\nProcessing: {semantic_label}")

            # process all audio files in genre sub-dir
            for f in filenames:

                # load audio file
                file_path = os.path.join(dirpath, f)
                signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

                # process all segments of audio file
                for d in range(num_segments):

                    # calculate start and finish sample for the current segment
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # extract mfcc
                    mfcc = librosa.feature.mfcc(y=signal[start:finish],
                                                sr=sample_rate,
                                                n_mfcc=num_mfcc,
                                                n_fft=n_fft,
                                                hop_length=hop_length)

                    mfcc = mfcc.T

                    # store only mfcc feature with the expected number of vectors
                    if len(mfcc) == num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print(f"{file_path}, segment:{d+1}")

    # save MFCCs to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

    # return the data dictionary
    return data

data = save_mfcc(TRAIN_DATASET_PATH, JSON_PATH, num_segments=6)

def load_data(data):

    # convert lists to numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    print("Data successfully loaded!")

    return X, y

X, y = load_data(data)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [9]:
prepare_dataset(train_data_dir, TRAIN_JSON_PATH)
prepare_dataset(test_data_dir, TEST_JSON_PATH)


Processing: Train

Processing: ddyo
Dataset/Train/ddyo/ddyo_324.wav, segment:1
Dataset/Train/ddyo/ddyo_936.wav, segment:1
Dataset/Train/ddyo/ddyo_1531.wav, segment:1
Dataset/Train/ddyo/ddyo_1195.wav, segment:1
Dataset/Train/ddyo/ddyo_1179.wav, segment:1
Dataset/Train/ddyo/ddyo_704.wav, segment:1




Dataset/Train/ddyo/ddyo_806.wav, segment:1
Dataset/Train/ddyo/ddyo_252.wav, segment:1
Dataset/Train/ddyo/ddyo_1549.wav, segment:1
Dataset/Train/ddyo/ddyo_679.wav, segment:1
Dataset/Train/ddyo/ddyo_250.wav, segment:1
Dataset/Train/ddyo/ddyo_294.wav, segment:1
Dataset/Train/ddyo/ddyo_473.wav, segment:1
Dataset/Train/ddyo/ddyo_1568.wav, segment:1
Dataset/Train/ddyo/ddyo_291.wav, segment:1
Dataset/Train/ddyo/ddyo_416.wav, segment:1
Dataset/Train/ddyo/ddyo_546.wav, segment:1
Dataset/Train/ddyo/ddyo_462.wav, segment:1
Dataset/Train/ddyo/ddyo_1245.wav, segment:1
Dataset/Train/ddyo/ddyo_1262.wav, segment:1
Dataset/Train/ddyo/ddyo_1422.wav, segment:1
Dataset/Train/ddyo/ddyo_1333.wav, segment:1
Dataset/Train/ddyo/ddyo_204.wav, segment:1
Dataset/Train/ddyo/ddyo_304.wav, segment:1
Dataset/Train/ddyo/ddyo_337.wav, segment:1
Dataset/Train/ddyo/ddyo_288.wav, segment:1
Dataset/Train/ddyo/ddyo_1327.wav, segment:1
Dataset/Train/ddyo/ddyo_647.wav, segment:1
Dataset/Train/ddyo/ddyo_677.wav, segment:1
Data

## Train and validation Datasets

In [10]:
# Function to load train and validation datasets
def load_train_dataset(json_path, batch_size, validation_split=0.2):
    # Load MFCCs from JSON and create TensorFlow dataset
    with open(json_path, "r") as fp:
        data = json.load(fp)

    mfccs = np.array(data["mfcc"])
    labels = np.array(data["labels"])

    dataset = tf.data.Dataset.from_tensor_slices((mfccs, labels))
    dataset = dataset.shuffle(len(mfccs)).batch(batch_size)

    train_size = int((1 - validation_split) * len(mfccs))
    train_ds = dataset.take(train_size)
    val_ds = dataset.skip(train_size)

    train_ds = train_ds.cache().prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.cache().prefetch(tf.data.AUTOTUNE)

    return train_ds, val_ds, data["mapping"]

In [13]:
train_mfcc_ds, val_mfcc_ds, mapping = load_train_dataset(TRAIN_JSON_PATH, BATCH_SIZE, VALIDATION_SPLIT)

## Test dataset

In [15]:
# Function to load test dataset
def load_test_dataset(json_path, batch_size):
    # Load MFCCs from JSON and create TensorFlow dataset
    with open(json_path, "r") as fp:
        data = json.load(fp)

    mfccs = np.array(data["mfcc"])
    labels = np.array(data["labels"])

    dataset = tf.data.Dataset.from_tensor_slices((mfccs, labels))
    dataset = dataset.shuffle(len(mfccs)).batch(batch_size)

    test_ds = dataset.cache().prefetch(tf.data.AUTOTUNE)

    return test_ds, data["mapping"]



In [16]:
test_mfcc_ds, mapping = load_test_dataset(TEST_JSON_PATH, BATCH_SIZE)

# Model 1

### Input shape 

In [17]:
example_spectrograms = next(iter(train_mfcc_ds))[0]
input_shape = example_spectrograms.shape[1:]

print('Input shape:', input_shape)
num_labels = len(mapping)

Input shape: (32, 13)


In [18]:
print(f'Labels {mapping}')
print(f'Number of labels: {num_labels}')

Labels ['Test', 'ddyo', 'mumaaso', 'unknown', 'kkono', 'emabega', 'yimirira']
Number of labels: 7


In [19]:
# Model artitecture 1
def model(input_shape, num_labels):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(16, 3, activation='relu', padding='same'),
        layers.MaxPooling2D(),
        layers.Conv2D(32, 3, activation='relu', padding='same'),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation='relu', padding='same'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, activation='relu', padding='same'),
        layers.GlobalMaxPooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_labels, activation='softmax')
    ])

    return model

model = model(input_shape, num_labels)

ValueError: Input 0 of layer "conv2d" is incompatible with the layer: expected min_ndim=4, found ndim=3. Full shape received: (None, 32, 13)

### Model Architecture

In [None]:
model.summary()

### Compile and Train the model

In [None]:
Epochs = 35
patience = 10
learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

In [None]:
# Function to compile and train the model
def compile_and_train_model(model, train_ds, val_ds, learning_rate=learning_rate):
    try:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
        early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience, min_lr=1e-6)
        history = model.fit(train_ds, validation_data=val_ds, epochs=Epochs, callbacks=[early_stopping, reduce_lr])
        return history
    except Exception as e:
        print(f"An error occurred during model compilation and training: {str(e)}")

In [None]:
history = compile_and_train_model(model, train_mfcc_ds, val_mfcc_ds)

### Plot Accuracy and Loss

In [None]:

# Function to plot the training history
def plot_training_history(history):
    try:
        acc = history.history['accuracy']
        val_acc = history.history['val_accuracy']
        loss = history.history['loss']
        val_loss = history.history['val_loss']

        epochs = range(len(acc))

        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(epochs, acc, 'r', label='Training accuracy')
        plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
        plt.title('Training and validation accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(epochs, loss, 'r', label='Training loss')
        plt.plot(epochs, val_loss, 'b', label='Validation loss')
        plt.title('Training and validation loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

        plt.tight_layout()
        plt.show()
    except Exception as e:
        print(f"An error occurred during plotting the training history: {str(e)}")

In [None]:
plot_training_history(history)

### Evaluate the model performance

Run the model on the test set and check the model's performance:

In [None]:
# Function to evaluate the model on the test dataset
def evaluate_model(model, test_ds):
    try:
        y_true = []
        y_pred = []
        for audio, labels in test_ds:
            predictions = model.predict(audio, verbose=0)
            y_true.extend(labels.numpy())
            y_pred.extend(tf.argmax(predictions, axis=1).numpy())

        loss, accuracy = model.evaluate(test_ds, verbose=0)
        precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

        print(f"Test accuracy:      {int(accuracy * 100)}%")
        print(f"Test loss:          {loss}")
        print(f"Precision:          {precision}")
        print(f"Recall:             {recall}")
        print(f"F1-score:           {f1}")
    except Exception as e:
        print(f"An error occurred during model evaluation: {str(e)}")

In [None]:
evaluate_model(model, test_mfcc_ds)

## Confusion matrix

In [None]:
y_pred = model.predict(test_mfcc_ds)
y_pred = tf.argmax(y_pred, axis=1)
y_true = tf.concat(list(test_mfcc_ds.map(lambda s,lab: lab)), axis=0)
label_names_slice = ['ddyo', 'emabega', 'gaali', 'kkono', 'mumaaso', 'unknown', 'yimirira']

In [None]:
# Function to plot the confusion matrix
def plot_confusion_matrix(y_true, y_pred, label_names):
    try:
        confusion_mtx = tf.math.confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(confusion_mtx,
                    xticklabels=label_names,
                    yticklabels=label_names,
                    annot=True, fmt='g')
        plt.xlabel('Prediction')
        plt.ylabel('Label')
        plt.title('Confusion Matrix')
        plt.show()
    except Exception as e:
        print(f"An error occurred during plotting the confusion matrix: {str(e)}")

In [None]:
plot_confusion_matrix(y_true, y_pred, label_names_slice)

## save the Keras model

In [None]:
KERAS_MODEL_PATH = "model/mfcc_model_1.keras"

In [None]:
model.save(KERAS_MODEL_PATH)

## Size of the keras model

In [None]:
# Function to get the file size
def get_and_convert_file_size(file_path, unit=None):
    size = os.path.getsize(file_path)
    if unit == "KB":
        return print('File size: ' + str(round(size / 1024, 3)) + ' Kilobytes')
    elif unit == "MB":
        return print('File size: ' + str(round(size / (1024 * 1024), 3)) + ' Megabytes')
    else:
        return print('File size: ' + str(size) + ' bytes')


In [None]:
keras_model_size = get_and_convert_file_size(KERAS_MODEL_PATH, 'MB')

In [None]:
model.save(KERAS_MODEL_PATH)
keras_model_size = get_and_convert_file_size(KERAS_MODEL_PATH, 'KB')

In [None]:
# Assuming you have a Keras model named 'model'
# import tensorflow as tf

# converter = tf.lite.TFLiteConverter.from_keras_model(model)
# tflite_model = converter.convert()
# with open('model.tflite', 'wb') as f:
#     f.write(tflite_model)

# Run an inference

In [None]:
from tensorflow.keras.models import load_model

KERAS_MODEL_PATH = "model/model_1.keras"
model = load_model(KERAS_MODEL_PATH)

In [None]:
from modules.inference import predict_audio

In [None]:
# file_path_inference = kkono_file_path
file_path_inference = 'ras1.wav'
predicted_label, probability = predict_audio(file_path_inference, model, SAMPLE_RATE)
print(f"Predicted label: {predicted_label}, Probability: {probability}")

In [None]:
file_path_inference = ddyo_file_path
predicted_label, probability = predict_audio(file_path_inference, model, SAMPLE_RATE)
print(f"Predicted label: {predicted_label}, Probability: {probability}")

In [None]:
file_path_inference = gaali_file_path
predicted_label, probability = predict_audio(file_path_inference, model, SAMPLE_RATE)
print(f"Predicted label: {predicted_label}, Probability: {probability}")

In [None]:
file_path_inference = yimirira_file_path
predicted_label, probability = predict_audio(file_path_inference, model, SAMPLE_RATE)
print(f"Predicted label: {predicted_label}, Probability: {probability}")


In [None]:
file_path_inference = emabega_file_path
predicted_label, probability = predict_audio(file_path_inference, model, SAMPLE_RATE)
print(f"Predicted label: {predicted_label}, Probability: {probability}")

In [None]:
file_path_inference = mumasso_file_path
predicted_label, probability = predict_audio(file_path_inference, model, SAMPLE_RATE)
print(f"Predicted label: {predicted_label}, Probability: {probability}") 
