# Packages, Libraries, and Constants
- Different packages, Libraries

- Different constants and parameters

In [1]:
from packages.utils import *

2024-09-29 20:44:51.926355: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-29 20:44:51.960866: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-29 20:44:51.969080: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-29 20:44:51.991933: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Path to the datasets

1. Speech Intent Classification (SIC) Dataset
 - `emabega` - `ddyo` - `unknown` - `kkono`  - `yimirira` - `mu maaso` 

2. Datasets
- `Original train data` - `Test data`


In [2]:
from packages.utils import train_data_dir, test_data_dir

## Directory Labels

In [4]:
from packages.data_processing import list_directory_contents


# Show the structure of the directory
train_commands = list_directory_contents(train_data_dir, 'Train')
test_commands = list_directory_contents(test_data_dir, 'Test')

Train commands labels: ['unknown' 'ddyo' 'emabega' 'yimirira' 'mumaaso' 'kkono']
Test commands labels: ['unknown' 'ddyo' 'emabega' 'yimirira' 'mumaaso' 'kkono']


# Dataset Pre-processing

## 1. Train and validation Datasets

- `Creating the Train and Validate Datasets`

In [None]:
from packages.data_processing import create_train_val_audio_dataset


# Spilt  and baccth data into train and validation and extract Labels
train_ds, val_ds, label_names = create_train_val_audio_dataset(train_data_dir)
print(f'Labels: {label_names}')

## 2. Test dataset

In [None]:
from packages.data_processing import create_test_audio_dataset


# Batch the test dataset
test_ds = create_test_audio_dataset(test_data_dir)

# Data Processing

- `Feature Extraction`

In [None]:
from packages.data_processing import preprocess_melspec_audio_datasets


# Extract Mel-Spectrograms from the audio files
train_mel_spec_ds, val_mel_spec_ds, test_mel_spec_ds = preprocess_melspec_audio_datasets(train_ds, val_ds, test_ds)

### Shape consistency

In [None]:
print(train_mel_spec_ds.element_spec)
print(val_mel_spec_ds.element_spec)
print(test_mel_spec_ds.element_spec)

# Model

### Input shape 

In [None]:
example_spectrograms = next(iter(train_mel_spec_ds))[0]
input_shape = example_spectrograms.shape[1:]
print('Input shape:', input_shape)

num_labels = len(label_names)
print(f'Labels {label_names}')

### Model Architecture

In [None]:
from packages.model import model


# Load an NN model
model = model(input_shape, num_labels)
model.summary()

### Compile and Train the model

In [None]:
from packages.model import compile_and_train_model


# Compile, Train and validate the model
history = compile_and_train_model(model, train_mel_spec_ds, val_mel_spec_ds)

### Plot Accuracy and Loss

In [None]:
from packages.model import plot_training_history


# Plot the training history
plot_training_history(history)

### Evaluate the model performance

Run the model on the test set and check the model's performance:

In [None]:
from packages.model import evaluate_model


# Evaluate the model using the test dataset
evaluate_model(model, test_mel_spec_ds)

## Confusion matrix

In [None]:
from packages.model import plot_confusion_matrix

y_pred = model.predict(test_mel_spec_ds)
y_pred = tf.argmax(y_pred, axis=1)
y_true = tf.concat(list(test_mel_spec_ds.map(lambda s,lab: lab)), axis=0)
label_names_slice = ['ddyo', 'emabega', 'kkono', 'mumaaso', 'unknown', 'yimirira']


# Plot the confusion matrix
plot_confusion_matrix(y_true, y_pred, label_names_slice)

## save the Keras model

In [None]:
KERAS_MODEL_PATH = "model/Model_spec_1.keras"

model.save(KERAS_MODEL_PATH)
print('Model has been successfully saved')

## Size of the model

In [None]:
from packages.utils import get_and_convert_file_size


# Get the size of the trained model
keras_model_size = get_and_convert_file_size(KERAS_MODEL_PATH, 'MB')