In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from PIL import Image
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LSTM, Bidirectional, Dropout, Reshape, Attention
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import sys

In [None]:
if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    spectrograms_dir = "/content/drive/MyDrive/audio_representations/RAVDESS/spectrograms"
else :
    spectrograms_dir = "audio_representations/spectrograms"

In [None]:
images = []
labels = []

for file_name in os.listdir(spectrograms_dir):
    if file_name.endswith(".png"):
        file_path = os.path.join(spectrograms_dir, file_name)

        # Convert to numpy
        img = Image.open(file_path).convert("RGB")
        img = img.resize((256, 256))
        img_array = np.array(img)

        images.append(img_array)

        label = int(file_name.split("-")[2]) - 1
        labels.append(label)

images = np.array(images)
labels = np.array(labels)

# Normalize
images = images / 255.0

# One hot
num_classes = len(np.unique(labels))
labels_one_hot = to_categorical(labels, num_classes=num_classes)

X_train, X_temp, y_train, y_temp = train_test_split(images, labels_one_hot, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Ensemble d'entraînement : {X_train.shape}, {y_train.shape}")
print(f"Ensemble de validation : {X_val.shape}, {y_val.shape}")
print(f"Ensemble de test : {X_test.shape}, {y_test.shape}")

In [None]:
def create_cnn_blstm_attention_model(input_shape, num_classes):
    model = Sequential()

    # CNN Part
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))

    # Flatten the output from CNN to pass into LSTM
    model.add(Flatten())  # Flatten, if you want to maintain this approach for some reason

    # Reshape for LSTM layer
    model.add(Reshape((1, -1)))  # Reshape it to (batch_size, time_steps, features)

    # BLSTM Part
    model.add(Bidirectional(LSTM(64, return_sequences=True)))  # LSTM with return_sequences=True for attention layer

    # Attention Layer
    model.add(Attention())  # Attention mechanism here

    # Fully connected layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))  # Dropout to prevent overfitting

    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))  # Final output layer

    # Compile model
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    return model

input_shape = (256, 256, 3)  # 3 channels (RGB)
model = create_cnn_blstm_attention_model(input_shape, num_classes)

model.summary()


In [None]:
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_val, y_val)
)

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Loss on the test set : {test_loss}")
print(f"Accuracy on the test set : {test_accuracy}")

In [None]:
save_path = '/content/drive/MyDrive/models/cnn_blstm_attention_ravdess_spectrograms'

model.save(save_path + "/weights.h5")

In [None]:
plt.figure(figsize=(6, 5))

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Loss Evolution")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.savefig(os.path.join(save_path, "loss_curve.png"))
plt.close()

plt.figure(figsize=(6, 5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Accuracy Evolution")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.savefig(os.path.join(save_path, "accuracy_curve.png"))
plt.close()


In [None]:
emotion_labels = [
    "Neutral", "Calm", "Happy", "Sad", "Angry",
    "Fearful", "Disgust", "Surprised"
]

y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

conf_matrix = confusion_matrix(y_true_labels, y_pred_labels)

disp = ConfusionMatrixDisplay(
    confusion_matrix=conf_matrix,
    display_labels=emotion_labels
)
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
plt.title("Confusion Matrix CNN-BLSTM with attention Spectrograms")
conf_matrix_path = os.path.join(save_path, "confusion_matrix.png")
plt.savefig(conf_matrix_path)
plt.close()
