In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/AIExam/data.zip /content
!unzip data.zip

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import csv
from PIL import Image
import numpy as np

# Function to load data from CSV and images
def load_data(file_path):
    data = []
    with open(file_path, newline='', encoding='utf-8') as csvfile:
        csv_reader = csv.reader(csvfile)
        next(csv_reader, None)
        for row in csv_reader:
            text, image_path, emotion = row
            img = Image.open(image_path).convert('RGB')
            img = img.resize((64, 64))
            img_array = np.array(img) / 255.0
            entry = {'text': text, 'image_data': img_array, 'emotion': emotion}
            data.append(entry)
    return data


file_path = "data/processed/text_image_emotion.csv"
data = load_data(file_path)

In [None]:
# Convert data to numpy arrays
texts = [entry['text'] for entry in data]
images = np.array([entry['image_data'] for entry in data])
emotions = [entry['emotion'] for entry in data]

# Encode emotions
label_encoder = LabelEncoder()
encoded_emotions = label_encoder.fit_transform(emotions)
categorical_emotions = tf.keras.utils.to_categorical(encoded_emotions, num_classes=len(label_encoder.classes_))

# Tokenize the text
max_words = 10000
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
text_sequences = tokenizer.texts_to_sequences(texts)
text_padded = pad_sequences(text_sequences, padding='post')

# Split data into training, testing and validation sets
text_train, text_test_temp, image_train, image_test_temp, emotion_train, emotion_test_temp = train_test_split(
    text_padded, images, categorical_emotions, test_size=0.2, random_state=42
)

text_test, text_val, image_test, image_val, emotion_test, emotion_val = train_test_split(
    text_test_temp, image_test_temp, emotion_test_temp, test_size=0.5, random_state=42
)

In [14]:
# Build the neural network model
text_input = layers.Input(shape=(text_padded.shape[1],), dtype=tf.int32)
embedding_layer = layers.Embedding(input_dim=max_words, output_dim=16, input_length=text_padded.shape[1])(text_input)
lstm_layer = LSTM(16)(embedding_layer)
text_flatten = layers.Flatten()(lstm_layer)

image_input = layers.Input(shape=(64, 64, 3))
conv_layer = layers.Conv2D(32, (3, 3), activation='relu')(image_input)
pooling_layer = layers.MaxPooling2D((2, 2))(conv_layer)
flatten_layer = layers.Flatten()(pooling_layer)

# Concatenate the flattened text and image layers
concatenated = layers.Concatenate()([text_flatten, flatten_layer])
# Add Dropout for regularization
dropout_layer = layers.Dropout(0.5)(concatenated)
dense_layer = layers.Dense(64, activation='relu')(dropout_layer)
output_layer = layers.Dense(len(label_encoder.classes_), activation='softmax')(dense_layer)

model = models.Model(inputs=[text_input, image_input], outputs=output_layer)

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Add ModelCheckpoint callback to save the best model
checkpoint_callback = ModelCheckpoint("best_model.h5", save_best_only=True, monitor="val_accuracy", mode="max")

# Create an ImageDataGenerator for data augmentation
image_data_generator = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)

# callback per l'early stopping
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 34)]                 0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 64, 64, 3)]          0         []                            
                                                                                                  
 embedding (Embedding)       (None, 34, 16)               160000    ['input_1[0][0]']             
                                                                                                  
 conv2d (Conv2D)             (None, 62, 62, 32)           896       ['input_2[0][0]']             
                                                                                              

In [15]:

model.fit([text_train, image_train], emotion_train, epochs=30, validation_data=([text_val, image_val], emotion_val), callbacks=[checkpoint_callback, early_stopping_callback])


Epoch 1/30

  saving_api.save_model(


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30


<keras.src.callbacks.History at 0x7c4ecf0e96f0>

In [17]:
evaluation_results = model.evaluate([text_test, image_test], emotion_test)
print(f"Loss: {evaluation_results[0]}, Accuracy: {evaluation_results[1]}")

# TODO: cambiare dati
#predictions = model.predict([text_test, image_test])
#print(predictions)

Loss: 1.2395706176757812, Accuracy: 0.5675213932991028
