## LSTM

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Embedding, Dropout, LSTM, Dense
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping
from sklearn.model_selection import  RandomizedSearchCV
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from keras_tuner import RandomSearch
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Bidirectional, Layer
import tensorflow.keras as keras
from sklearn.metrics import classification_report, confusion_matrix
from matplotlib import pyplot as plt
import seaborn as sns
from numpy import asarray
from numpy import zeros
import fasttext ###
from keras.regularizers import L2
import tensorflow.keras.backend as K


### Dataset Loading and Splitting

In [None]:
df_train = pd.read_csv('../../data/New dataset/LSTM/preprocessing/train_tweets_LSTM_pre_new.csv')
df_val = pd.read_csv('../../data/New dataset/LSTM/preprocessing/eval_tweets_LSTM_pre_new.csv')
df_test = pd.read_csv('../../data/New dataset/LSTM/preprocessing/test_tweets_LSTM_p_new.csv')

In [None]:
X_train, y_train = df_train["tweet_text"], df_train["cyberbullying_type"]
X_val, y_val = df_val["tweet_text"], df_val["cyberbullying_type"]
X_test, y_test = df_test["tweet_text"], df_test["cyberbullying_type"]

In [None]:
df_train['tweet_length'] = df_train['tweet_text'].apply(lambda x: len(x.split()))

plt.figure(figsize=(10, 6))
sns.histplot(df_train['tweet_length'], bins=30, kde=True)
plt.title('Tweet Length distribution')
plt.xlabel('Tweet length')
plt.ylabel('Frequency')
plt.show()

In [None]:
df_train['text_len'] = [len(text.split()) for text in df_train.tweet_text]
max_len = 45
count = (df_train['text_len'] >= max_len).sum() 
total_tweets = len(df_train)
percentage = (count / total_tweets) * 100
print(f"Percentage of tweets longer than {max_len} tokens: {percentage:.2f}%")

### Embeddings

In [None]:
# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform label encoder on the target variable
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.transform(y_val)
y_test = label_encoder.transform(y_test)

#Each word in input used as a key, while a unique index is used as the value of the key 
word_tokenizer = Tokenizer()
word_tokenizer.fit_on_texts(X_train)

X_train = word_tokenizer.texts_to_sequences(X_train)
X_test = word_tokenizer.texts_to_sequences(X_test)
X_val = word_tokenizer.texts_to_sequences(X_val)

vocab_length = len(word_tokenizer.word_index) + 1

print("The vocaboulary length is:", vocab_length)

X_train = pad_sequences(X_train, padding = 'pre', maxlen = max_len)
X_test = pad_sequences(X_test, padding = 'pre', maxlen = max_len)
X_val = pad_sequences(X_val, padding = 'pre', maxlen = max_len)

#### Glove

In [None]:
# Load GloVe word embeddings and create a dictionary that willl contain words as keys, and their corresponging embedding list as values. 
embeddings_dictionary = dict()
glove_file = open('Embeddings/glove.twitter.27B.50d.txt', encoding="utf8") ## ATTENZIONE: change if you change emebdding dim

for line in glove_file:

    records = line.split()
    word = records[0]

    vector_dimensions = asarray(records[1:], dtype='float32')
    embeddings_dictionary [word] = vector_dimensions

glove_file.close()

embedding_matrix = zeros((vocab_length, 50)) ## ATTENZIONE: change if the dimention of embedding changes above

i = 0
for word, index in word_tokenizer.word_index.items():
    embedding_vector = embeddings_dictionary.get(word)

    if embedding_vector is not None:
        embedding_matrix[index] = embedding_vector
    else:
        i = i + 1

print("The number of out-of-vocabulary words is:", i)
print("The percentage of out-of-vocabulary words is:", (i/vocab_length) * 100 )

#### Fasttext

In [None]:
#Load embedding matrix 
#embedding_matrix = np.load("Embeddings/fasttext_embeddingmatrix_no.npy") 

#num_words, embedding_dim = embedding_matrix.shape

In [None]:
# Just checking everything is working properly

print(X_train[1])
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_val shape: {X_val.shape}')
print(f'y_val shape: {y_val.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test shape: {y_test.shape}')

## LSTM (baseline)

## Baseline - no dense layer

### Model Creation and RandomizedGridSearch

In [None]:
def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_length, 
                        output_dim=embedding_matrix.shape[1], 
                        weights=[embedding_matrix],  
                        trainable=True, #Depending on which embeddings we use
                        mask_zero = True))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.3, max_value=0.5, step=0.1)))
    model.add(LSTM(units=hp.Choice('units', values=[16, 32, 64]), kernel_regularizer=L2(0.01)))
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    #model.add(Dense(64, activation='relu')) #regolarizzazione: diminuisci i features prima di generare output
    model.add(Dense(5, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

In [None]:
# RandomSearch configuration
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,  # Max numbers of different configurations to test
    executions_per_trial=1,  # Number of times each config is executed (different weigths initialization)
    overwrite=True,
    directory='baseline_nodense_rs', ## ATTENZIONE: cambiare a seconda della configuraizone che proviamo (inserire tipo LSTM: baseline, bidirectional, attention layer) + DENSE layer o NO 
    project_name='pre_50_rs' ### Inserire config parametri: dataset (pre o no), num_embeddings
)

# Callback di EarlyStopping
callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

fixed_batch_size = 64
tuner.search(X_train, y_train, 
             epochs=100, 
             validation_data=(X_val, y_val),
             callbacks=[callback],
             batch_size=fixed_batch_size)

In [None]:
# Check the results of the search 

num_trials = len(tuner.oracle.trials)
print(f'Number of evaluated configurations: {num_trials}')

tuner.results_summary()

### Model Training

As suggested on the Keras official documentation, we retrain the model using the best parameters found. 

In [None]:
# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters()

# Build the model with the best hp.
model = build_model(best_hps[0])

# Model training
history = model.fit(x = X_train, y = y_train, 
                         epochs=50,
                         validation_data = [X_val, y_val],
                         callbacks = [callback],
                         batch_size=fixed_batch_size)

In [None]:
# Loss and accuracy plots for training and validation

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

### Model Assessment

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))

class_names = label_encoder.classes_
print("Class Names: ", class_names)

conf_matrix = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

## Baseline - dense layer

### Model creation and RandomizedGridSearch

In [None]:
def build_model_dense(hp):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_length, 
                        output_dim=embedding_matrix.shape[1], 
                        weights=[embedding_matrix],  
                        trainable=True, # ATTENZIONE: Depending on which embeddings we use
                        mask_zero=True))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.3, max_value=0.5, step=0.1)))
    model.add(LSTM(units=hp.Choice('units', values=[16, 32, 64]), kernel_regularizer=L2(0.01)))
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(5, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

In [None]:
# RandomSearch configuration
tuner = RandomSearch(
    build_model_dense,
    objective='val_loss',
    max_trials=10,  # Max numbers of different configurations to test
    executions_per_trial=1,  # Number of times each config is executed (different weigths initialization)
    overwrite=True,
    directory='baseline_dense_rs', ## ATTENZIONE: cambiare a seconda della configuraizone che proviamo (inserire tipo LSTM: baseline, bidirectional, attention layer) + DENSE o NO 
    project_name='pre_50_rs' ### ATTENZIONE: inserire config parametri: dataset (pre o no), num_embeddings
)

# Callback di EarlyStopping
callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

fixed_batch_size = 64
tuner.search(X_train, y_train, 
             epochs=100, 
             validation_data=(X_val, y_val),
             callbacks=[callback],
             batch_size=fixed_batch_size)

In [None]:
# Check the results of the search 

num_trials = len(tuner.oracle.trials)
print(f'Number of evaluated configurations: {num_trials}')

tuner.results_summary()

### Model training

In [None]:
# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters()

# Build the model with the best hp.
model_dense = build_model_dense(best_hps[0])

# Model training
history = model_dense.fit(x = X_train, y = y_train, 
                         epochs=50,
                         validation_data = [X_val, y_val],
                         callbacks = [callback],
                         batch_size=fixed_batch_size)

In [None]:
# Loss and accuracy plots for training and validation

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

### Model Assessment

In [None]:
test_loss, test_accuracy = model_dense.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
y_pred = model_dense.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))

class_names = label_encoder.classes_
print("Class Names: ", class_names)

conf_matrix = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

## LSTM (Bidirectional Layer)

## Bidirectional - no dense layer

In [None]:
def build_model_bi(hp):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_length, 
                        output_dim=embedding_matrix.shape[1], 
                        weights=[embedding_matrix],  
                        trainable=True, # ATTENZIONE: Depending on which embeddings we use
                        mask_zero=True))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.3, max_value=0.5, step=0.1)))
    model.add(Bidirectional(LSTM(units=hp.Choice('units', values=[16, 32, 64]), kernel_regularizer=L2(0.01))))
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    #model.add(Dense(64, activation='relu'))
    model.add(Dense(5, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

    # RandomSearch configuration
tuner = RandomSearch(
    build_model_bi,
    objective='val_loss',
    max_trials=10,  # Max numbers of different configurations to test
    executions_per_trial=1,  # Number of times each config is executed (different weigths initialization)
    overwrite=True,
    directory='bidirectional_nodense_rs', ## ATTENZIONE: cambiare a seconda della configuraizone che proviamo (inserire tipo LSTM: baseline, bidirectional, attention layer) + DENSE o NO 
    project_name='pre_50_rs' ### ATTENZIONE: inserire config parametri: dataset (pre o no), num_embeddings
)

# Callback di EarlyStopping
callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

fixed_batch_size = 64
tuner.search(X_train, y_train, 
             epochs=100, 
             validation_data=(X_val, y_val),
             callbacks=[callback],
             batch_size=fixed_batch_size)

In [None]:
# Check the results of the search 

num_trials = len(tuner.oracle.trials)
print(f'Number of evaluated configurations: {num_trials}')

tuner.results_summary()

In [None]:
# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters()

# Build the model with the best hp.
model_bi = build_model_bi(best_hps[0])

# Model training
history = model_bi.fit(x = X_train, y = y_train, 
                         epochs=50,
                         validation_data = [X_val, y_val],
                         callbacks = [callback],
                         batch_size=fixed_batch_size)

In [None]:
# Loss and accuracy plots for training and validation

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
test_loss, test_accuracy = model_bi.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
y_pred = model_bi.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))

class_names = label_encoder.classes_
print("Class Names: ", class_names)

conf_matrix = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

## Bidirectional - dense layer

In [None]:
def build_model_bi_dense(hp):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_length, 
                        output_dim=embedding_matrix.shape[1], 
                        weights=[embedding_matrix],  
                        trainable=True, # ATTENZIONE: Depending on which embeddings we use
                        mask_zero=True))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.3, max_value=0.5, step=0.1)))
    model.add(Bidirectional(LSTM(units=hp.Choice('units', values=[16, 32, 64]), kernel_regularizer=L2(0.01))))
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(5, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

    # RandomSearch configuration
tuner = RandomSearch(
    build_model_bi_dense,
    objective='val_loss',
    max_trials=10,  # Max numbers of different configurations to test
    executions_per_trial=1,  # Number of times each config is executed (different weigths initialization)
    overwrite=True,
    directory='bidirectional_dense_rs', ## ATTENZIONE: cambiare a seconda della configuraizone che proviamo (inserire tipo LSTM: baseline, bidirectional, attention layer) + DENSE o NO 
    project_name='pre_50_rs' ### ATTENZIONE: inserire config parametri: dataset (pre o no), num_embeddings
)

# Callback di EarlyStopping
callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

fixed_batch_size = 64
tuner.search(X_train, y_train, 
             epochs=100, 
             validation_data=(X_val, y_val),
             callbacks=[callback],
             batch_size=fixed_batch_size)

In [None]:
# Check the results of the search 

num_trials = len(tuner.oracle.trials)
print(f'Number of evaluated configurations: {num_trials}')

tuner.results_summary()

In [None]:
# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters()

# Build the model with the best hp.
model_bi_dense = build_model_bi_dense(best_hps[0])

# Model training
history = model_bi_dense.fit(x = X_train, y = y_train, 
                         epochs=50,
                         validation_data = [X_val, y_val],
                         callbacks = [callback],
                         batch_size=fixed_batch_size)

In [None]:
# Loss and accuracy plots for training and validation

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
test_loss, test_accuracy = model_bi_dense.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
y_pred = model_bi_dense.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))

class_names = label_encoder.classes_
print("Class Names: ", class_names)

conf_matrix = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

## Attention Layer

In [None]:
class AttentionLayer(Layer):
    def __init__(self):
        super(AttentionLayer, self).__init__()

    def build(self, input_shape):
        # input_shape = (sequence_length, features)
        # Define the shape of the weights = (features, 1)
        self.W = self.add_weight(name='attention_weights',
                                 shape=(input_shape[-1], 1),
                                 initializer='normal',
                                 trainable=True)

        # Define the shape of the bias (sequence_length, 1) 
        self.b = self.add_weight(name='attention_bias',
                                shape=(input_shape[-2], 1),
                                initializer='zeros',
                                trainable=True)
        
        super(AttentionLayer, self).build(input_shape)

    def call(self, x, mask=None):
        # x is the input tensor with shape: (batch_size, sequence_length, features)
        # Compute the attention scores, which should have shape (sequence_length, 1)
        attention_scores = K.dot(x, self.W)  # Shape: (sequence_length, 1)
        # Adding the bias
        attention_scores += self.b
        # Applying tanh activation to the attention scores
        attention_scores = K.tanh(attention_scores)
        
        if mask is not None:
            # Use the mask to zero-out padding in the softmax computation
            mask = K.cast(mask, K.floatx())  # Convert mask to float
            mask = K.expand_dims(mask, axis=-1)  # Make mask shape match attention_scores
            attention_scores += (mask - 1) * 1e9  # Apply a large negative to mask-out areas

        # Applying softmax to get attention weights across the sequence dimension
        attention_weights = K.softmax(attention_scores, axis=1)

        # Multiply each value by the corresponding attention weights (broadcasting)
        weighted_input = x * attention_weights

        # Summing over the sequence length to get the context vector
        context_vector = K.sum(weighted_input, axis=1)

        return context_vector

    def compute_output_shape(self, input_shape):
        # The output of the layer is a context vector for each example
        # with the dimensionality equal to the feature size of the input.
        return (input_shape[0], input_shape[-1])

    def compute_mask(self, inputs, mask=None):
        # Return None because no need to pass the mask to subsequent layers
        return None


## Attention - nodense

In [None]:
def build_model_att(hp):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_length, 
                        output_dim=embedding_matrix.shape[1], 
                        weights=[embedding_matrix],  
                        trainable=True, # ATTENZIONE: Depending on which embeddings we use
                        mask_zero = True))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.3, max_value=0.5, step=0.1)))
    model.add(LSTM(units=hp.Choice('units', values=[16, 32, 64]), kernel_regularizer=L2(0.01), return_sequences=True))
    model.add(AttentionLayer())
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    #model.add(Dense(64, activation='relu'))
    model.add(Dense(5, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

In [None]:
# RandomSearch configuration
tuner = RandomSearch(
    build_model_att,
    objective='val_loss',
    max_trials=10,  # Max numbers of different configurations to test
    executions_per_trial=1,  # Number of times each config is executed (different weigths initialization)
    overwrite=True,
    directory='attention_nodense_rs', ## ATTENZIONE: cambiare a seconda della configuraizone che proviamo (inserire tipo LSTM: baseline, bidirectional, attention layer) + DENSE o NO 
    project_name='pre_50_rs' ### ATTENZIONE: inserire config parametri: dataset (pre o no), num_embeddings
)

# Callback di EarlyStopping
callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

fixed_batch_size = 64
tuner.search(X_train, y_train, 
             epochs=100, 
             validation_data=(X_val, y_val),
             callbacks=[callback],
             batch_size=fixed_batch_size)

# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters()

# Build the model with the best hp.
model_att = build_model_att(best_hps[0])

# Model training
history = model_att.fit(x = X_train, y = y_train, 
                         epochs=50,
                         validation_data = [X_val, y_val],
                         callbacks = [callback],
                         batch_size=fixed_batch_size)

In [None]:
# Loss and accuracy plots for training and validation

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
test_loss, test_accuracy = model_att.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
y_pred = model_att.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))

class_names = label_encoder.classes_
print("Class Names: ", class_names)

conf_matrix = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

## Attention - dense layer

In [None]:
def build_model_att_dense(hp):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_length, 
                        output_dim=embedding_matrix.shape[1], 
                        weights=[embedding_matrix],  
                        trainable=True, # ATTENZIONE: Depending on which embeddings we use
                        mask_zero = True))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.3, max_value=0.5, step=0.1)))
    model.add(LSTM(units=hp.Choice('units', values=[16, 32, 64]), kernel_regularizer=L2(0.01), return_sequences=True))
    model.add(AttentionLayer())
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    #model.add(Dense(64, activation='relu'))
    model.add(Dense(5, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

In [None]:
# RandomSearch configuration
tuner = RandomSearch(
    build_model_att_dense,
    objective='val_loss',
    max_trials=10,  # Max numbers of different configurations to test
    executions_per_trial=1,  # Number of times each config is executed (different weigths initialization)
    overwrite=True,
    directory='attention_dense_rs', ## ATTENZIONE: cambiare a seconda della configuraizone che proviamo (inserire tipo LSTM: baseline, bidirectional, attention layer) + DENSE o NO 
    project_name='pre_50_rs' ### ATTENZIONE: inserire config parametri: dataset (pre o no), num_embeddings
)

# Callback di EarlyStopping
callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

fixed_batch_size = 64
tuner.search(X_train, y_train, 
             epochs=100, 
             validation_data=(X_val, y_val),
             callbacks=[callback],
             batch_size=fixed_batch_size)

# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters()

# Build the model with the best hp.
model_att_dense = build_model_att_dense(best_hps[0])

# Model training
history = model_att_dense.fit(x = X_train, y = y_train, 
                         epochs=50,
                         validation_data = [X_val, y_val],
                         callbacks = [callback],
                         batch_size=fixed_batch_size)

In [None]:
# Loss and accuracy plots for training and validation

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
test_loss, test_accuracy = model_att_dense.evaluate(X_test, y_test)

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
y_pred = model_att_dense.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))

class_names = label_encoder.classes_
print("Class Names: ", class_names)

conf_matrix = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()