# CNN

#### CNN with MelSpectogram

In [3]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models

# Définir le chemin vers votre dataset
path_to_your_dataset = 'donateacry_corpus_cleaned_and_updated_data'

# Listez les sous-dossiers correspondant à chaque classe
subfolders = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Initialiser des listes pour stocker les chemins des fichiers audio et les étiquettes
data = []    # Cette liste stockera les chemins des fichiers audio
labels = []  # Cette liste stockera les étiquettes correspondantes (noms de classe)

# Parcourez chaque sous-dossier
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Parcourez chaque fichier audio dans le sous-dossier
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convertissez les étiquettes en nombres
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Divisez les données en ensembles de formation et de test (80% formation, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Fonction pour normaliser les fichiers audio
def normalize_audio(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    normalized_audio = librosa.util.normalize(audio)
    return normalized_audio

# Normalisez les fichiers audio dans les ensembles de formation et de test
train_data = [normalize_audio(audio_path) for audio_path in train_data]
test_data = [normalize_audio(audio_path) for audio_path in test_data]
# Fonction pour extraire les caractéristiques audio (spectrogrammes)
def extract_spectrogram(audio):
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=22050, n_mels=128, fmax=8000)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
    return spectrogram_db

# Créez des ensembles de formation et de test de spectrogrammes
X_train = [extract_spectrogram(audio) for audio in train_data]
X_test = [extract_spectrogram(audio) for audio in test_data]

# Trouvez la forme minimale des spectrogrammes
min_shape = min([spectrogram.shape[1] for spectrogram in X_train + X_test])

# Fonction pour remodeler et tronquer les spectrogrammes
def reshape_and_truncate(spectrogram, target_shape):
    if spectrogram.shape[1] > target_shape:
        return spectrogram[:, :target_shape]
    else:
        return np.pad(spectrogram, ((0, 0), (0, target_shape - spectrogram.shape[1])))

# Appliquer reshape_and_truncate à tous les spectrogrammes
X_train = [reshape_and_truncate(spectrogram, min_shape) for spectrogram in X_train]
X_test = [reshape_and_truncate(spectrogram, min_shape) for spectrogram in X_test]

# Convertir les listes en tableaux NumPy
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(train_labels)
y_test = np.array(test_labels)


# Définissez l'architecture du modèle
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(len(subfolders), activation='softmax'))

# Compilez le modèle
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Ajoutez une dimension pour indiquer les canaux (1 pour les niveaux de gris)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

# Entraînez le modèle
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))





Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x16fd2d2a8d0>

In [4]:
# Évaluez le modèle sur l'ensemble de test
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

# Faites des prédictions sur l'ensemble de test
predictions = model.predict(X_test)

# Convertissez les prédictions en classes
predicted_classes = np.argmax(predictions, axis=1)

# Analysez les métriques telles que la précision, le rappel, et la précision
from sklearn.metrics import classification_report
print(classification_report(y_test, predicted_classes, target_names=subfolders))


Test Accuracy: 0.782608687877655
              precision    recall  f1-score   support

  belly_pain       0.00      0.00      0.00         4
     burping       0.00      0.00      0.00         2
  discomfort       0.00      0.00      0.00         7
      hungry       0.78      1.00      0.88        72
       tired       0.00      0.00      0.00         7

    accuracy                           0.78        92
   macro avg       0.16      0.20      0.18        92
weighted avg       0.61      0.78      0.69        92



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### CNN with MFCC

In [17]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder

# Define the path to your dataset
path_to_your_dataset = 'donateacry_corpus_cleaned_and_updated_data'

# List subfolders corresponding to each class
subfolders = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Initialize lists to store file paths and labels
data = []    # This list will store the paths of audio files
labels = []  # This list will store the corresponding labels (class names)

# Iterate through each subfolder
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Iterate through each audio file in the subfolder
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convert labels to numbers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets (80% train, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Function to extract audio features (MFCCs)
def extract_mfcc(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=13)
    return mfccs

# Create training and testing sets
X_train = [extract_mfcc(audio_path) for audio_path in train_data]
X_test = [extract_mfcc(audio_path) for audio_path in test_data]

# Find the minimum shape of the MFCCs
min_shape = min([mfccs.shape[1] for mfccs in X_train + X_test])

# Function to reshape and truncate the MFCCs
def reshape_and_truncate(mfccs, target_shape):
    if mfccs.shape[1] > target_shape:
        return mfccs[:, :target_shape]
    else:
        return np.pad(mfccs, ((0, 0), (0, target_shape - mfccs.shape[1])))

# Apply reshape_and_truncate to all MFCCs
X_train = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_train]
X_test = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_test]

# Convert lists to NumPy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(train_labels)
y_test = np.array(test_labels)

# Reshape the input data for the CNN model
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

# Define the CNN model architecture
model_cnn_mfcc = models.Sequential()
model_cnn_mfcc.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)))
model_cnn_mfcc.add(layers.MaxPooling2D((2, 2)))
model_cnn_mfcc.add(layers.Flatten())
model_cnn_mfcc.add(layers.Dense(64, activation='relu'))
model_cnn_mfcc.add(layers.Dense(len(subfolders), activation='softmax'))

# Compile the model
model_cnn_mfcc.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model_cnn_mfcc.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x16f93182350>

In [18]:
# Évaluez le modèle sur l'ensemble de test
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy with MFCC: {test_accuracy}")

# Faites des prédictions sur l'ensemble de test
predictions = model_cnn_mfcc.predict(X_test)

# Convertissez les prédictions en classes
predicted_classes = np.argmax(predictions, axis=1)

# Analysez les métriques telles que la précision, le rappel, et la précision
from sklearn.metrics import classification_report
print(classification_report(y_test, predicted_classes, target_names=subfolders))

Test Accuracy with MFCC: 0.782608687877655
              precision    recall  f1-score   support

  belly_pain       0.00      0.00      0.00         4
     burping       0.00      0.00      0.00         2
  discomfort       0.00      0.00      0.00         7
      hungry       0.77      0.94      0.85        72
       tired       0.00      0.00      0.00         7

    accuracy                           0.74        92
   macro avg       0.15      0.19      0.17        92
weighted avg       0.60      0.74      0.67        92



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Save model

In [21]:
model.save('cnn_model.h5')

In [20]:
model_cnn_mfcc.save('cnn_mfcc_model.h5')

# RNN

#### RNN using MFCC

In [12]:
# Iterate through each subfolder
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Iterate through each audio file in the subfolder
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convert labels to numbers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets (80% train, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Function to extract audio features (MFCCs)
def extract_features(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=13)
    return mfccs

# Create training and testing sets
X_train = [extract_features(audio_path) for audio_path in train_data]
X_test = [extract_features(audio_path) for audio_path in test_data]

# Find the minimum shape of the MFCCs
min_shape = min([mfccs.shape[1] for mfccs in X_train + X_test])

# Function to reshape and truncate the MFCCs
def reshape_and_truncate(mfccs, target_shape):
    if mfccs.shape[1] > target_shape:
        return mfccs[:, :target_shape]
    else:
        return np.pad(mfccs, ((0, 0), (0, target_shape - mfccs.shape[1])))

# Apply reshape_and_truncate to all MFCCs
X_train = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_train]
X_test = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_test]

# Convert lists to NumPy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(train_labels)
y_test = np.array(test_labels)

# Reshape the input data for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))

# One-hot encode labels
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

# Create the LSTM model
model_rnn_mfcc = models.Sequential()
model_rnn_mfcc.add(layers.LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model_rnn_mfcc.add(layers.Dense(len(subfolders), activation='softmax'))

# Compile the model
model_rnn_mfcc.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model_rnn_mfcc.fit(X_train, y_train_one_hot, epochs=10, validation_data=(X_test, y_test_one_hot))

# Evaluate the model on the test set
test_loss, test_accuracy = model_rnn_mfcc.evaluate(X_test, y_test_one_hot)
print(f"Test Accuracy with mfcc: {test_accuracy}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy with mfcc: 0.9863387942314148


#### RNN using MelSpectogram

In [8]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

# Define the base path of your dataset
path_to_your_dataset = 'donateacry_corpus_cleaned_and_updated_data'

# List the subfolders corresponding to each class
subfolders = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Initialize lists to store file paths and labels
data = []    # This list will store the paths of audio files
labels = []  # This list will store the corresponding labels (class names)

# Iterate through each subfolder
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Iterate through each audio file in the subfolder
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convert labels to numbers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets (80% train, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Function to extract audio features (Mel Spectrogram)
def extract_features(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=22050, n_mels=128, fmax=8000)
    return mel_spectrogram

# Create training and testing sets
X_train = [extract_features(audio_path) for audio_path in train_data]
X_test = [extract_features(audio_path) for audio_path in test_data]

# Find the minimum shape of the mel spectrograms
min_shape = min([mel.shape[1] for mel in X_train + X_test])

# Function to reshape and truncate the mel spectrograms
def reshape_and_truncate(mel, target_shape):
    if mel.shape[1] > target_shape:
        return mel[:, :target_shape]
    else:
        return np.pad(mel, ((0, 0), (0, target_shape - mel.shape[1])))

# Apply reshape_and_truncate to all mel spectrograms
X_train = [reshape_and_truncate(mel, min_shape) for mel in X_train]
X_test = [reshape_and_truncate(mel, min_shape) for mel in X_test]

# Convert lists to NumPy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)

# Reshape the input data for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))

# One-hot encode labels
y_train_one_hot = to_categorical(train_labels)
y_test_one_hot = to_categorical(test_labels)

# Create the LSTM model
model_rnn_mel = models.Sequential()
model_rnn_mel.add(layers.LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model_rnn_mel.add(layers.Dense(len(subfolders), activation='softmax'))

# Compile the model
model_rnn_mel.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model_rnn_mel.fit(X_train, y_train_one_hot, epochs=10, validation_data=(X_test, y_test_one_hot))

# Evaluate the model on the test set
test_loss_rnn_mel, test_accuracy_rnn_mel = model_rnn_mel.evaluate(X_test, y_test_one_hot)
print(f"Test Accuracy (RNN with Mel Spectrogram): {test_accuracy_rnn_mel}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy (RNN with Mel Spectrogram): 0.782608687877655


#### Save Model

In [13]:
model_rnn_mfcc.save('rnn_mfcc_model.h5')

  saving_api.save_model(


In [14]:
model_rnn_mel.save('rnn_mel_model.h5')

# RCNN

#### RCNN with MelSpectogram

In [23]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten

import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

# Define the base path of your dataset
path_to_your_dataset = 'donateacry_corpus_cleaned_and_updated_data'

# List the subfolders corresponding to each class
subfolders = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Initialize lists to store file paths and labels
data = []    # This list will store the paths of audio files
labels = []  # This list will store the corresponding labels (class names)

# Iterate through each subfolder
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Iterate through each audio file in the subfolder
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convert labels to numbers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets (80% train, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)# Modify these variables according to your data
def extract_features(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=22050, n_mels=128, fmax=8000)
    return mel_spectrogram

# Create training and testing sets
X_train = [extract_features(audio_path) for audio_path in train_data]
X_test = [extract_features(audio_path) for audio_path in test_data]

# Find the minimum shape of the mel spectrograms
min_shape = min([mel.shape[1] for mel in X_train + X_test])

# Function to reshape and truncate the mel spectrograms
def reshape_and_truncate(mel, target_shape):
    if mel.shape[1] > target_shape:
        return mel[:, :target_shape]
    else:
        return np.pad(mel, ((0, 0), (0, target_shape - mel.shape[1])))

# Apply reshape_and_truncate to all mel spectrograms
X_train = [reshape_and_truncate(mel, min_shape) for mel in X_train]
X_test = [reshape_and_truncate(mel, min_shape) for mel in X_test]

# Convert lists to NumPy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)

# Reshape the input data for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))

# One-hot encode labels
y_train_one_hot = to_categorical(train_labels)
y_test_one_hot = to_categorical(test_labels)
# Create the RCNN model
model_rcnn_mel = Sequential()

# Convolutional layer
model_rcnn_mel.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model_rcnn_mel.add(MaxPooling1D(pool_size=2))

# LSTM layer
model_rcnn_mel.add(LSTM(64, activation='relu', return_sequences=True))

# Flatten layer
model_rcnn_mel.add(Flatten())

# Dense layers
model_rcnn_mel.add(Dense(64, activation='relu'))
model_rcnn_mel.add(Dense(len(subfolders), activation='softmax'))

# Compile the model
model_rcnn_mel.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print a summary of the model architecture
model_rcnn_mel.summary()

# Train the model
model_rcnn_mel.fit(X_train, y_train_one_hot, epochs=10, validation_data=(X_test, y_test_one_hot))

# Evaluate the model on the test set
test_loss_rcnn_mel, test_accuracy_rcnn_mel = model_rcnn_mel.evaluate(X_test, y_test_one_hot)
print(f"Test Accuracy (RCNN with Mel Spectrogram): {test_accuracy_rcnn_mel}")


Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 126, 32)           27008     
                                                                 
 max_pooling1d (MaxPooling1  (None, 63, 32)            0         
 D)                                                              
                                                                 
 lstm_4 (LSTM)               (None, 63, 64)            24832     
                                                                 
 flatten_3 (Flatten)         (None, 4032)              0         
                                                                 
 dense_10 (Dense)            (None, 64)                258112    
                                                                 
 dense_11 (Dense)            (None, 5)                 325       
                                                      

#### RCNN with MFCC

In [24]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten
from tensorflow.keras.utils import to_categorical

# Define the base path of your dataset
path_to_your_dataset = 'donateacry_corpus_cleaned_and_updated_data'

# List the subfolders corresponding to each class
subfolders = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Initialize lists to store file paths and labels
data = []    # This list will store the paths of audio files
labels = []  # This list will store the corresponding labels (class names)

# Iterate through each subfolder
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Iterate through each audio file in the subfolder
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convert labels to numbers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets (80% train, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Function to extract audio features (MFCC)
def extract_features(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=13)
    return mfccs

# Create training and testing sets
X_train = [extract_features(audio_path) for audio_path in train_data]
X_test = [extract_features(audio_path) for audio_path in test_data]

# Find the minimum shape of the MFCCs
min_shape = min([mfccs.shape[1] for mfccs in X_train + X_test])

# Function to reshape and truncate the MFCCs
def reshape_and_truncate(mfccs, target_shape):
    if mfccs.shape[1] > target_shape:
        return mfccs[:, :target_shape]
    else:
        return np.pad(mfccs, ((0, 0), (0, target_shape - mfccs.shape[1])))

# Apply reshape_and_truncate to all MFCCs
X_train = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_train]
X_test = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_test]

# Convert lists to NumPy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)

# Reshape the input data for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))

# One-hot encode labels
y_train_one_hot = to_categorical(train_labels)
y_test_one_hot = to_categorical(test_labels)

# Create the RCNN model with MFCC features
model_rcnn_mfcc = Sequential()

# Convolutional layer
model_rcnn_mfcc.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model_rcnn_mfcc.add(MaxPooling1D(pool_size=2))

# LSTM layer
model_rcnn_mfcc.add(LSTM(64, activation='relu', return_sequences=True))

# Flatten layer
model_rcnn_mfcc.add(Flatten())

# Dense layers
model_rcnn_mfcc.add(Dense(64, activation='relu'))
model_rcnn_mfcc.add(Dense(len(subfolders), activation='softmax'))

# Compile the model
model_rcnn_mfcc.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print a summary of the model architecture
model_rcnn_mfcc.summary()

# Train the model
model_rcnn_mfcc.fit(X_train, y_train_one_hot, epochs=10, validation_data=(X_test, y_test_one_hot))

# Evaluate the model on the test set
test_loss_rcnn_mfcc, test_accuracy_rcnn_mfcc = model_rcnn_mfcc.evaluate(X_test, y_test_one_hot)
print(f"Test Accuracy (RCNN with MFCC): {test_accuracy_rcnn_mfcc}")


Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 11, 32)            27008     
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 5, 32)             0         
 g1D)                                                            
                                                                 
 lstm_5 (LSTM)               (None, 5, 64)             24832     
                                                                 
 flatten_4 (Flatten)         (None, 320)               0         
                                                                 
 dense_12 (Dense)            (None, 64)                20544     
                                                                 
 dense_13 (Dense)            (None, 5)                 325       
                                                      

In [25]:
model_rcnn_mel.save('rcnn_mel_model.h5')

  saving_api.save_model(


In [26]:
model_rcnn_mfcc.save('rcnn_mfcc_model.h5')

  saving_api.save_model(
