# Data Preprocessing

In [8]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Définir le chemin vers votre dataset
path_to_your_dataset = 'donateacry_corpus_cleaned_and_updated_data'

# Listez les sous-dossiers correspondant à chaque classe
subfolders = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Initialiser des listes pour stocker les chemins des fichiers audio et les étiquettes
data = []    # Cette liste stockera les chemins des fichiers audio
labels = []  # Cette liste stockera les étiquettes correspondantes (noms de classe)

# Parcourez chaque sous-dossier
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Parcourez chaque fichier audio dans le sous-dossier
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convertissez les étiquettes en nombres
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Divisez les données en ensembles de formation et de test (80% formation, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Fonction pour normaliser les fichiers audio
def normalize_audio(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    normalized_audio = librosa.util.normalize(audio)
    return normalized_audio

# Normalisez les fichiers audio dans les ensembles de formation et de test
train_data = [normalize_audio(audio_path) for audio_path in train_data]
test_data = [normalize_audio(audio_path) for audio_path in test_data]


In [3]:
# Fonction pour extraire les caractéristiques audio (spectrogrammes)
def extract_spectrogram(audio):
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=22050, n_mels=128, fmax=8000)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
    return spectrogram_db

# Créez des ensembles de formation et de test de spectrogrammes
X_train = [extract_spectrogram(audio) for audio in train_data]
X_test = [extract_spectrogram(audio) for audio in test_data]

# Trouvez la forme minimale des spectrogrammes
min_shape = min([spectrogram.shape[1] for spectrogram in X_train + X_test])

# Fonction pour remodeler et tronquer les spectrogrammes
def reshape_and_truncate(spectrogram, target_shape):
    if spectrogram.shape[1] > target_shape:
        return spectrogram[:, :target_shape]
    else:
        return np.pad(spectrogram, ((0, 0), (0, target_shape - spectrogram.shape[1])))

# Appliquer reshape_and_truncate à tous les spectrogrammes
X_train = [reshape_and_truncate(spectrogram, min_shape) for spectrogram in X_train]
X_test = [reshape_and_truncate(spectrogram, min_shape) for spectrogram in X_test]

# Convertir les listes en tableaux NumPy
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(train_labels)
y_test = np.array(test_labels)


# CNN

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Définissez l'architecture du modèle
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(len(subfolders), activation='softmax'))

# Compilez le modèle
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Ajoutez une dimension pour indiquer les canaux (1 pour les niveaux de gris)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

# Entraînez le modèle
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))





Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x186c3313b90>

In [5]:
# Évaluez le modèle sur l'ensemble de test
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

# Faites des prédictions sur l'ensemble de test
predictions = model.predict(X_test)

# Convertissez les prédictions en classes
predicted_classes = np.argmax(predictions, axis=1)

# Analysez les métriques telles que la précision, le rappel, et la précision
from sklearn.metrics import classification_report
print(classification_report(y_test, predicted_classes, target_names=subfolders))


Test Accuracy: 0.782608687877655
              precision    recall  f1-score   support

  belly_pain       0.00      0.00      0.00         4
     burping       0.00      0.00      0.00         2
  discomfort       0.00      0.00      0.00         7
      hungry       0.80      1.00      0.89        72
       tired       0.00      0.00      0.00         7

    accuracy                           0.78        92
   macro avg       0.16      0.20      0.18        92
weighted avg       0.63      0.78      0.70        92



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Data Augmentation

In [6]:


import os
import random
import numpy as np
import soundfile as sf
import librosa
from scipy.io import wavfile
import shutil
import hashlib

input_folder = r"C:\\Users\Dell\\ENSIAS\S3\\Infant_Monitoring_Device_\\cry_analysis\donateacry_corpus_cleaned_and_updated_data"  # Replace with the actual path to your input folder
output_folder = r"C:\\Users\Dell\\ENSIAS\S3\\Infant_Monitoring_Device_\\cry_analysis\donateacry_corpus_cleaned_and_updated_data_augmentated"  # Replace with the actual path to your output folder

desired_num_audios = 10  # number of desired audios
random_seed = 42
np.random.seed(random_seed)
random.seed(random_seed)

crop_duration = 3  # Random cropping duration in seconds
noise_level = 0.05  # Standard deviation for random normal distribution noise
volume_factor_range = [0.8, 1.2]  # Range for volume adjustment
speed_factor_range = [0.8, 1.2]  # Range for speed perturbation
pitch_semitones_range = [-2, 2]  # Range for pitch variation

# Define a set to store unique hashes of generated audio files
generated_audio_hashes = set()

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

audio_files = os.listdir(input_folder)
num_input_audios = len(audio_files)

# Copy the original audios
for audio_file in audio_files:
    input_audio_path = os.path.join(input_folder, audio_file)
    output_audio_path = os.path.join(output_folder, audio_file)
    shutil.copy(input_audio_path, output_audio_path)

# Function to generate a unique hash for an audio file
def generate_audio_hash(audio_data):
    return hashlib.sha1(audio_data).hexdigest()

# Augment the audio files
generated_audios = 0
while generated_audios < desired_num_audios:
    input_audio_file = random.choice(audio_files)
    input_audio_path = os.path.join(input_folder, input_audio_file)

    original_audio, sr = librosa.load(input_audio_path, sr=None)

    # Random cropping
    crop_start = np.random.uniform(0, len(original_audio) - crop_duration * sr)
    cropped_audio = original_audio[int(crop_start):int(crop_start + crop_duration * sr)]

    # Generate a hash for the augmented audio
    audio_hash = generate_audio_hash(cropped_audio)

    # Check if the hash is already in the set (duplicate)
    if audio_hash not in generated_audio_hashes:
        augmented_audio = cropped_audio.copy()
        output_audio_path = os.path.join(output_folder, f"random_cropping_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Random cropping audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")

        # Add the hash to the set
        generated_audio_hashes.add(audio_hash)

        generated_audios += 1

    # Continue to the next iteration if duplicates are found
    else:
        continue

    # Noise injection
    noise = np.random.normal(0, noise_level, len(cropped_audio))
    augmented_audio = cropped_audio + noise
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"noise_injection_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Noise injection audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Volume adjustment
    volume_factor = np.random.uniform(*volume_factor_range)
    augmented_audio = cropped_audio * volume_factor
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"volume_adjustment_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Volume adjustment audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Speed perturbation
    speed_factor = np.random.uniform(*speed_factor_range)
    augmented_audio = librosa.effects.time_stretch(cropped_audio, speed_factor)
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"speed_perturbation_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Speed perturbation audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Pitch variation
    pitch_semitones = np.random.uniform(*pitch_semitones_range)
    augmented_audio = librosa.effects.pitch_shift(cropped_audio, sr, pitch_semitones)
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"pitch_variation_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Pitch variation audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Continue to the next iteration if duplicates are found
    else:
        continue

     

PermissionError: [Errno 13] Permission denied: 'C:\\\\Users\\Dell\\\\ENSIAS\\S3\\\\Infant_Monitoring_Device_\\\\cry_analysis\\donateacry_corpus_cleaned_and_updated_data\\belly_pain'

# RNN

In [9]:
# Iterate through each subfolder
for label, subfolder in enumerate(subfolders):
    folder_path = os.path.join(path_to_your_dataset, subfolder)
    
    # Iterate through each audio file in the subfolder
    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            data.append(audio_path)
            labels.append(subfolder)

# Convert labels to numbers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets (80% train, 20% test)
train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Function to extract audio features (MFCCs)
def extract_features(audio_path):
    audio, _ = librosa.load(audio_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=13)
    return mfccs

# Create training and testing sets
X_train = [extract_features(audio_path) for audio_path in train_data]
X_test = [extract_features(audio_path) for audio_path in test_data]

# Find the minimum shape of the MFCCs
min_shape = min([mfccs.shape[1] for mfccs in X_train + X_test])

# Function to reshape and truncate the MFCCs
def reshape_and_truncate(mfccs, target_shape):
    if mfccs.shape[1] > target_shape:
        return mfccs[:, :target_shape]
    else:
        return np.pad(mfccs, ((0, 0), (0, target_shape - mfccs.shape[1])))

# Apply reshape_and_truncate to all MFCCs
X_train = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_train]
X_test = [reshape_and_truncate(mfccs, min_shape) for mfccs in X_test]

# Convert lists to NumPy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(train_labels)
y_test = np.array(test_labels)

# Reshape the input data for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))

# One-hot encode labels
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

# Create the LSTM model
model = models.Sequential()
model.add(layers.LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(layers.Dense(len(subfolders), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train_one_hot, epochs=10, validation_data=(X_test, y_test_one_hot))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test_one_hot)
print(f"Test Accuracy: {test_accuracy}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.9071038365364075
