In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from keras.callbacks import ModelCheckpoint

In [2]:
# Define directory paths
#train_dir = 'D:/Files/projects/yapay-zeka-proje/emotion-recognition-from-voice/Train and Test Spectrograms/Train'
#test_dir = 'D:/Files/projects/yapay-zeka-proje/emotion-recognition-from-voice/Train and Test Spectrograms/Test'

In [3]:
# New Spectrograms directories

train_dir = './My Spectrograms/Train'
test_dir = './My Spectrograms/Test'


In [4]:
# # Define image size and batch size
img_height, img_width = 400, 600
batch_size = 32

In [5]:
# Function to load images and labels from directory
def load_images_from_directory(directory):
    images = []
    labels = []
    label_to_index = {'Angry': 0, 'Calm': 1, 'Happy': 2, 'Sad': 3}
    
    for emotion in os.listdir(directory):
        emotion_dir = os.path.join(directory, emotion)
        label = label_to_index[emotion]
        
        for filename in os.listdir(emotion_dir):
            img_path = os.path.join(emotion_dir, filename)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (img_height, img_width))
            images.append(img)
            labels.append(label)
    
    return np.array(images), np.array(labels)

In [6]:
# Load train and test images
X_train, y_train = load_images_from_directory(train_dir)
X_test, y_test = load_images_from_directory(test_dir)

In [7]:
# Normalize pixel values
X_train = X_train / 255.0
X_test = X_test / 255.0

In [8]:
# Define model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')  # Adjust the output size according to the number of classes
])

  super().__init__(


In [9]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
# create a model checkpoint callback
mc = ModelCheckpoint('Models/spectrogram_best_model.keras', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

In [11]:
# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, callbacks=[mc])

Epoch 1/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.3003 - loss: 2.8629
Epoch 1: val_accuracy improved from -inf to 0.40433, saving model to Models/spectrogram_best_model.keras
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 4s/step - accuracy: 0.3022 - loss: 2.8343 - val_accuracy: 0.4043 - val_loss: 1.2805
Epoch 2/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.4901 - loss: 1.1274
Epoch 2: val_accuracy improved from 0.40433 to 0.51986, saving model to Models/spectrogram_best_model.keras
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 3s/step - accuracy: 0.4926 - loss: 1.1232 - val_accuracy: 0.5199 - val_loss: 1.1404
Epoch 3/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.7579 - loss: 0.6335
Epoch 3: val_accuracy improved from 0.51986 to 0.79783, saving model to Models/spectrogram_best_model.keras
[1m35/35[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x17ace941e20>

In [12]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 567ms/step - accuracy: 0.8654 - loss: 0.5305
Test accuracy: 0.8592057824134827


SAVE MODEL

In [13]:
# Save the trained model
#model.save('MySpectrograms_spectrogram_model.h5')

In [14]:
np.save('Train Test Splits/S_X_train.npy', X_train)
np.save('Train Test Splits/S_X_test.npy', X_test)
np.save('Train Test Splits/S_y_train.npy', y_train)
np.save('Train Test Splits/S_y_test.npy', y_test)

LOAD MODEL

In [15]:
# Load the trained model
model = tf.keras.models.load_model('Models/spectrogram_best_model.keras')  

PREPROCESS NEW AUDIO

In [16]:
import librosa

def preprocess_audio(audio_path, sr=22050, duration=2, n_mels=128, image_size=(600, 400)):
    # Load audio file
    audio, _ = librosa.load(audio_path, sr=sr, duration=duration, mono=True)

    # Convert audio to spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

    # Resize spectrogram to match the input shape of the model
    spectrogram = cv2.resize(spectrogram, dsize=image_size[::-1], interpolation=cv2.INTER_CUBIC)


    # Convert single-channel spectrogram to three-channel image
    spectrogram_rgb = np.stack((spectrogram,) * 3, axis=-1)




    return spectrogram_rgb[:, :, :3]




In [18]:
#audio_path = './test/Angry/7895_kz_japon.wav' 
audio_path = './test/Calm/7895_sk_japon.wav' 
#audio_path = './test/Sad/7895_hl_japon.wav' 

In [19]:
# Load and preprocess the audio recording
#audio_path = './test/Angry/7895_kz_saf.wav' # path to your audio recording

spectrogram = preprocess_audio(audio_path)  # Function to convert audio to spectrogram and preprocess it

In [20]:
def normalize_spectrogram(spectrogram):
    # Normalize the spectrogram data
    normalized_spectrogram = ((spectrogram - np.min(spectrogram)) / (np.max(spectrogram) - np.min(spectrogram)) * 255).astype(np.uint8)
    return normalized_spectrogram


In [21]:
# Normalize the spectrogram
spectrogram = normalize_spectrogram(spectrogram)  # Function to normalize the spectrogram

In [22]:
plt.imshow(spectrogram)
plt.axis('off')  # Turn off axis labels
plt.show()

NameError: name 'plt' is not defined

In [None]:
# Define emotion labels mapping
emotion_labels = ['Angry', 'Calm', 'Happy', 'Sad']

In [None]:
# Predict the emotion category
try:
    predictions = model.predict(np.expand_dims(spectrogram, axis=0))
    predicted_label = emotion_labels[np.argmax(predictions)]
    print('Predicted Emotion:', predicted_label)
except Exception as e:
    print('Error during prediction:', e)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Predicted Emotion: Angry
