In [2]:
# Con data augmentation

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, AveragePooling2D, Flatten, Dense, Input, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop

seed_value = 32
os.environ['PYTHONHASHSEED'] = str(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

train_dir = "data/spectrograms/speech"
test_dir = "data/spectrograms_test/speech"

img_width, img_height = 224, 224
batch_size = 32

# Data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.125,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)
# neutral_datagen = ImageDataGenerator(
#     rescale=1./255,
#     validation_split=0.125,
#     shear_range=0.4,
#     zoom_range=0.4,
#     horizontal_flip=True,
#     rotation_range=20,
#     width_shift_range=0.2,
#     height_shift_range=0.2
# )
test_datagen = ImageDataGenerator(rescale=1./255)

# # Neutral generator
# neutral_generator = neutral_datagen.flow_from_directory(
#     train_dir,
#     target_size=(img_width, img_height),
#     batch_size=batch_size,
#     class_mode='categorical',
#     subset='training',
#     classes=['neutral']
# )

# # Other emotions generator
# other_generator = train_datagen.flow_from_directory(
#     train_dir,
#     target_size=(img_width, img_height),
#     batch_size=batch_size,
#     class_mode='categorical',
#     subset='training',
#     classes=[c for c in os.listdir(train_dir) if c != 'neutral']
# )

# Combined generators
train_generator = tf.keras.preprocessing.image.DirectoryIterator(
    directory=train_dir,
    image_data_generator=train_datagen,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical'
)

def create_cnn_x_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    
    x = Conv2D(8, (3, 3), activation='relu')(inputs)
    x = AveragePooling2D((2, 2))(x)
    
    x = Conv2D(16, (3, 3), activation='relu')(x)
    x = AveragePooling2D((2, 2))(x)
    
    x = Conv2D(32, (3, 3), activation='relu')(x)
    x = AveragePooling2D((2, 2))(x)
    
    x = Flatten()(x)
    x = Dense(2048, activation='relu')(x)
    x = Dropout(0.5)(x)  
    x = Dense(2048, activation='relu')(x)
    x = Dropout(0.5)(x)  
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs, outputs)
    return model

input_shape = (img_width, img_height, 3)
num_classes = 8
learning_rate = 0.0001  
momentum = 0.8
epochs = 50  

model = create_cnn_x_model(input_shape, num_classes)

optimizer = RMSprop(learning_rate=learning_rate, momentum=momentum)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    epochs=epochs
)

test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // batch_size)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

# Save the model
# model.save('cnn_x_model.h5')


Found 67 images belonging to 1 classes.
Found 947 images belonging to 7 classes.
Found 1014 images belonging to 8 classes.
Found 142 images belonging to 8 classes.
Found 300 images belonging to 8 classes.


Epoch 1/50


  self._warn_if_super_not_called()


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 700ms/step - accuracy: 0.1459 - loss: 2.0721 - val_accuracy: 0.1328 - val_loss: 2.0614
Epoch 2/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.1250 - loss: 2.0374 - val_accuracy: 0.3571 - val_loss: 2.0476
Epoch 3/50


  self.gen.throw(value)


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 701ms/step - accuracy: 0.1497 - loss: 2.0461 - val_accuracy: 0.2500 - val_loss: 1.9770
Epoch 4/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0938 - loss: 2.1512 - val_accuracy: 0.2143 - val_loss: 1.9420
Epoch 5/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 686ms/step - accuracy: 0.2663 - loss: 1.9073 - val_accuracy: 0.3516 - val_loss: 1.8451
Epoch 6/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.3125 - loss: 1.8395 - val_accuracy: 0.2857 - val_loss: 1.9037
Epoch 7/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 661ms/step - accuracy: 0.2919 - loss: 1.7756 - val_accuracy: 0.3984 - val_loss: 1.7496
Epoch 8/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3125 - loss: 1.7486 - val_accuracy: 0.1429 - val_loss: 2.1566
Epoch 9/50
[1m31/31[0m [32m━━━━━━━━━━━━