**Notebook submitted by Ayesha Binte Safiullah and Alishba Ishrat**

# **Import Dataset**

In [None]:
import numpy as np
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from wordcloud import WordCloud

In [None]:
import kagglehub

path = kagglehub.dataset_download("julian3833/jigsaw-toxic-comment-classification-challenge")
print("Path to dataset files:", path)

In [None]:
data = pd.read_csv(path + '/train.csv')

# **Dataset Statistics**

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
print('data shape:', data.shape)

In [None]:
data.isnull().sum()

# **EDA**

### Average Comment Length

In [None]:
all_X = data['comment_text'].apply( lambda x : len(x) )
print(f"The average comment text length is {all_X.mean()}")

### **Pie Chart showing Distribution opf Labels**

In [None]:
COLUMNS = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

df_distribution = data[COLUMNS].sum()\
                            .to_frame()\
                            .rename(columns={0: 'count'})\
                            .sort_values('count')

df_distribution.plot.pie(y='count',
                                      title='Label distribution over comments/',
                                      figsize=(5, 5))\
                            .legend(loc='center left', bbox_to_anchor=(1.3, 0.5))

In [None]:
df_distribution.sort_values('count',ascending=False)

### **Calculate number of comment for each label**

In [None]:
# to calculate number of comment for each label

df_comb=data.groupby(COLUMNS)\
                    .size()\
                    .sort_values(ascending=False)\
                    .reset_index()\
                    .rename(columns={0: 'count'})

df_comb.head(n=10)

### **Multi Label Comments**

In [None]:
df_labels = data[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']]
data['Multi_labele_comment'] = df_labels.sum(axis=1)

print(f"Multi label comments : {(data['Multi_labele_comment'] > 1).sum()}")

In [None]:
data = data.drop('Multi_labele_comment', axis=1)

### **Correlation Matrix**

In [None]:
f, ax=plt.subplots(figsize=(9,6))

f.suptitle('Correlation matrix for categories: ')

sns.heatmap(data[COLUMNS].corr(), annot=True, linewidths=.5,ax=ax)

### **Co-occurrence**

In [None]:
#Co-occurrence

label_columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

# Create a co-occurrence matrix by transposing and dot product
co_occurrence_matrix = data[label_columns].T.dot(data[label_columns])

# Plot the heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(co_occurrence_matrix, annot=True, cmap="YlGnBu", fmt="d", cbar=True)
plt.title("Category Co-occurrence Heatmap")
plt.show()

In [None]:
# Word Count distribution
plt.figure(figsize=(8.7, 6))

data['word_count'] = data['comment_text'].apply(lambda x: len(x.split()))
data['tag_count'] = data[['toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum(axis=1)

# Plot the histogram with improved customization
ax = data['word_count'].plot(kind='hist', bins=30, color='skyblue', edgecolor='black', alpha=0.7)

# Add gridlines for better readability
plt.grid(True, linestyle='--', alpha=0.5)

# Add a vertical line for the mean
mean = data['word_count'].mean()
plt.axvline(mean, color='red', linestyle='dashed', linewidth=2, label=f'Mean: {mean:.2f}')

# Add a vertical line for the median
median = data['word_count'].median()
plt.axvline(median, color='green', linestyle='dashed', linewidth=2, label=f'Median: {median}')

# Add a title and labels with improved readability
plt.title('Word Count Distribution of Comments', fontsize=16, fontweight='bold')
plt.xlabel('Number of Words', fontsize=10)
plt.ylabel('Frequency', fontsize=10)

# Add a legend
plt.legend()

# Improve layout to avoid clipping
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
data = data.drop('word_count', axis=1)
data = data.drop('tag_count', axis=1)

### **Venn Diagram**

In [None]:
from matplotlib_venn import venn2
from matplotlib_venn import venn3

In [None]:
t = data[(data['toxic'] == 1) & (data['insult'] == 0) & (data['obscene'] == 0)].shape[0]
i = data[(data['toxic'] == 0) & (data['insult'] == 1) & (data['obscene'] == 0)].shape[0]
o = data[(data['toxic'] == 0) & (data['insult'] == 0) & (data['obscene'] == 1)].shape[0]

t_i = data[(data['toxic'] == 1) & (data['insult'] == 1) & (data['obscene'] == 0)].shape[0]
t_o = data[(data['toxic'] == 1) & (data['insult'] == 0) & (data['obscene'] == 1)].shape[0]
i_o = data[(data['toxic'] == 0) & (data['insult'] == 1) & (data['obscene'] == 1)].shape[0]

t_i_o = data[(data['toxic'] == 1) & (data['insult'] == 1) & (data['obscene'] == 1)].shape[0]


# Make the diagram
plt.figure(figsize=(8, 8))
plt.title("Venn diagram for 'toxic', 'insult' and 'obscene'")
venn3(subsets = (t, i, t_i, o, t_o, i_o, t_i_o),
      set_labels=('toxic', 'insult', 'obscene'))

In [None]:
t = data[(data['toxic'] == 1) & (data['severe_toxic'] == 0)].shape[0]
s = data[(data['toxic'] == 0) & (data['severe_toxic'] == 1)].shape[0]

t_s = data[(data['toxic'] == 1) & (data['severe_toxic'] == 1)].shape[0]


# Make the diagram
plt.figure(figsize=(8, 8))
plt.title("Venn diagram for 'toxic' and 'severe_toxic'")
venn2(subsets = (t, s, t_s),
      set_labels=('toxic', 'severe_toxic'))
plt.show()

### **Number of Words in each comment**

In [None]:
def NumberOfWords(column_name):
    """
    This function calculates the number of words in each comment with respect to the comment type.
    Displays a quick summary about the number of words like mean, max, etc.

    Parameters:
    column_name (str): The name of the column to filter the comments. The function will process comments where the value in this column is 1.

    Returns:
    None
    """
    if column_name not in data.columns[2:8]:
        print(f"Column '{column_name}' does not exist in the DataFrame.")
        return

    # Filter comments and calculate word length
    filtered_comments = data[data[column_name] == 1]['comment_text'].dropna()
    data['len'] = filtered_comments.apply(lambda x: len(x.split()))

    print(f"Analysis of Number of words in {column_name}:")
    print(data['len'].describe())

In [None]:
NumberOfWords('toxic')

In [None]:
NumberOfWords('severe_toxic')

In [None]:
NumberOfWords('obscene')

In [None]:
NumberOfWords('threat')

In [None]:
NumberOfWords('insult')

In [None]:
NumberOfWords('identity_hate')

In [None]:
data = data.drop('len', axis=1)

### **Distribution across Labels**

In [None]:
# Set a style for the plot
sns.set(style="whitegrid")

# Visualize the distribution of the target labels
plt.figure(figsize=(8.5, 7))
barplot = sns.barplot(
    x=data[['toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum().index,
    y=data[['toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum().values,
    palette='viridis', edgecolor='black'
)

# Add counts on top of each bar
for p in barplot.patches:
    barplot.annotate(f'{int(p.get_height())}',
                     (p.get_x() + p.get_width() / 2., p.get_height()),
                     ha='center', va='bottom',
                     fontsize=10, color='black',
                     xytext=(0, 5),  # Adjusts the position of the text
                     textcoords='offset points')

# Enhance title and labels
plt.title("Distribution of Comments Across Labels", fontsize=16, fontweight='bold')
plt.ylabel("Count", fontsize=14)

plt.xlabel("Categories", fontsize=14)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
data[data['toxic'] == 1]

In [None]:
from wordcloud import WordCloud

def plot_wordcloud(column_name, width=720, height=520):

    if column_name not in data.columns[2:8]:
        print(f"Column '{column_name}' does not exist in the DataFrame.")
        return

    text = ' '.join(data[data[column_name] == 1]['comment_text'])
    wordcloud_ = WordCloud(width=width, height=height, background_color='black').generate(text)

    plt.figure(figsize=(10, 7))
    plt.imshow(wordcloud_, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'Word Cloud for {column_name} Comments')
    plt.savefig("word_cloud_.png")
    plt.show()

    word_frequencies = wordcloud_.words_

    # Top 20 Words used in toxic comments
    words = pd.DataFrame(word_frequencies, index=[0]).T[:20]
    words.reset_index(inplace=True)
    words.rename(columns={0:'Normalized Frequencies', 'index':'Word'}, inplace=True)
    words['Normalized Frequencies'] = words['Normalized Frequencies'] * 100
    print(f'Top 20 words used in {column_name} comments:')
    print(words.head(20))

In [None]:
plot_wordcloud("toxic")

In [None]:
plot_wordcloud("severe_toxic")

In [None]:
plot_wordcloud("obscene")

In [None]:
plot_wordcloud("threat")

In [None]:
plot_wordcloud("insult")

In [None]:
plot_wordcloud("identity_hate")

### **Positive and Negative Class Frequency in each label**

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(13, 10))
columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
ax = ax.flatten()

for i, col in enumerate(columns):
    data[col].value_counts().plot(kind='bar', ax=ax[i], title=col, color=['skyblue', 'salmon'])
    ax[i].set_xticklabels(['0 (Not Toxic)', '1 (Toxic)'], rotation=0)

plt.tight_layout()

plt.show()

# **Text Preprocessing**

In [None]:
data["document_length"] = data["comment_text"].apply(lambda words: len(words.split(" ")))
max_seq_len = np.round(data["document_length"].mean() + data["document_length"].std()).astype(int)
print(max_seq_len)

In [None]:
data = data.drop('document_length', axis=1)

In [None]:
import nltk
nltk.download('stopwords')
nltk.download('punkt_tab')
nltk.download('wordnet')

In [None]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [None]:
stop_words = stopwords.words('english')
stop_words.extend(['wiki', 'wikipedia', '.', ',', '"', "'", ':', ';', '(', ')', '[', ']', '{', '}', '_',
                 'zero','one','two','three','four','five','six','seven','eight','nine','ten','may',
                 'also','across','among','beside','however', 'yet','within'])

### **Clean Text**

In [None]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"what's", "what is ", text) 
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r"\'scuse", " excuse ", text)
    text = re.sub('\W', ' ', text)
    text = re.sub('\s+', ' ', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\d+', '', text) 
    text = text.strip(' ')
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

In [None]:
data['comment_text'] = data['comment_text'].astype(str).apply(clean_text)

In [None]:
# Check the updated dataset
print(f"Dataset shape after cleaning: {data.shape}")
print("Sample cleaned data:")
display(data.sample(5))

In [None]:
raw_comments_data = data['comment_text']

### **Train-Val-Test Split**

In [None]:
y_train_tmp = data[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].values

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train_split, X_test, y_train_split, y_test = train_test_split(raw_comments_data, y_train_tmp, test_size=0.2, random_state=42, shuffle=True)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train_split, y_train_split, test_size=0.1, random_state=42)

In [None]:
X_train = X_train.tolist()
X_val = X_val.tolist()
X_test = X_test.tolist()

In [None]:
y_test

### **Hyperparameters**

In [None]:
embed_size=50
max_features=20000
maxlen=max_seq_len

### **Tokenization and Padding**

In [None]:
import tensorflow
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
tokenizer = Tokenizer(num_words=max_features, lower=True)
tokenizer.fit_on_texts(X_train)
list_tokenized_train = tokenizer.texts_to_sequences(X_train)
list_tokenized_val = tokenizer.texts_to_sequences(X_val)
list_tokenized_test = tokenizer.texts_to_sequences(X_test)
X_train_padded = pad_sequences(list_tokenized_train, maxlen=maxlen)
X_val_padded = pad_sequences(list_tokenized_val, maxlen=maxlen)
X_test_padded = pad_sequences(list_tokenized_test, maxlen=maxlen)

print("Tokenization done")

In [None]:
word_index = tokenizer.word_index
print('Found %s unique tokens.' %len(word_index))

In [None]:
X_train_padded.shape

Model Training

# **Feature Representation and Vectorization**

### Import and unzip GloVe txt file

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip

In [None]:
!unzip glove*.zip

In [None]:
print('Indexing word vectors.')

embeddings_index={}
f = open('glove.6B.50d.txt', encoding='utf-8')
for line in f:
    values = line.strip().split(' ')
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

In [None]:
all_embs = np.stack(list(embeddings_index.values()))  # Convert to list first
emb_mean, emb_std = all_embs.mean(), all_embs.std()
emb_mean,emb_std
embed_size = all_embs.shape[1]

### **Embeddings Matrix**

In [None]:
print('preparing embedding matrix...')
words_not_found = []
nb_words = min(max_features, len(word_index) + 1)
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
    else:
        words_not_found.append(word)
print('Done.')

# **Tackling Class Imbalance**

### **Computing weights**

In [None]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

def calculating_class_weights(y_true):
    number_dim = np.shape(y_true)[1]
    weights = np.empty([number_dim, 2])  # Assuming binary classification per class
    for i in range(number_dim):
        # Compute class weights for each class
        weights[i] = compute_class_weight('balanced', classes=[0, 1], y=y_true[:, i])
    return weights

# Assuming y_train is your training labels
class_weights = calculating_class_weights(y_train)
print(class_weights)

### Computing loss function

In [None]:
from tensorflow.keras import backend as K

def get_weighted_loss(weights):
    def weighted_loss(y_true, y_pred):
    # Ensure weights are defined and accessible here
        return K.mean((weights[:, 0] ** (1 - y_true)) * (weights[:, 1] ** (y_true)) * 
                  K.binary_crossentropy(y_true, y_pred), axis=-1)
    return weighted_loss

# **Bi-LSTM Model Training**

In [None]:
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Conv1D, BatchNormalization
from keras.layers import Bidirectional, GlobalMaxPooling1D, GlobalAveragePooling1D, concatenate, SpatialDropout1D
from keras.models import Model
from keras.optimizers import Adam
from tensorflow.keras.models import Sequential  # Import Sequential from Keras

In [None]:
model_LSTM = Sequential([
    Input(shape=(maxlen,)),  # This line is optional, as the input shape is inferred in Sequential.
    Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False),
    Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1, kernel_initializer='he_normal')),
    GlobalMaxPooling1D(),
    Dense(50, activation="relu"),
    Dropout(0.1),
    Dense(6, activation="sigmoid")  # Sigmoid gives independent probabilities for each class
])
model_LSTM.name = 'model_LSTM'

In [None]:
# Compile the model
model_LSTM.compile(loss=get_weighted_loss(class_weights), optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

# Display the model summary
model_LSTM.summary()

In [None]:
batch_size=128
epochs=20

### **Model Checkpoint and Early Stopping**

In [None]:
from keras.callbacks import Callback
from sklearn.metrics import roc_auc_score

class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val_padded, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val_padded, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: {:d} - score: {:.6f}".format(epoch+1, score))

In [None]:
from keras.callbacks import EarlyStopping,ModelCheckpoint

filepath="best.weights.h5"
mcp = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True,  save_weights_only=True, mode='min')
earlystop = EarlyStopping(monitor="val_accuracy", mode="max", patience=4)
RocAuc_val = RocAucEvaluation(validation_data=(X_val_padded, y_val), interval = 1)

callbacks_list = [RocAuc_val, mcp, earlystop]

In [None]:
history_model_LSTM = model_LSTM.fit(X_train_padded,
                                    y_train,
                                    batch_size=batch_size,
                                    epochs=epochs,
                                    validation_data=(X_val_padded, y_val),
                                    callbacks = callbacks_list,
                                    verbose=1)

# **Bi-LSTM Model Evaluation**

In [None]:
test_loss, test_accuracy = model_LSTM.evaluate(X_test_padded, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy:.4f}")

In [None]:
from sklearn.metrics import accuracy_score

list_classes = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

model_LSTM.load_weights(filepath)
# Make predictions on the evaluation data
y_pred = model_LSTM.predict(X_test_padded, verbose=1)

# For each class, compute accuracy
for i, class_name in enumerate(list_classes):  # Assuming list_classes contains the class names
    y_true_class = y_test[:, i]  # True labels for the class
    y_pred_class = (y_pred[:, i]).astype(int)  # Convert predicted probabilities to binary labels
    class_accuracy = accuracy_score(y_true_class, y_pred_class)
    print(f"Accuracy for {class_name}: {class_accuracy:.4f}")

###

### **Classification Report**

In [None]:
from sklearn.metrics import classification_report

cr = classification_report(y_test, np.round(y_pred), target_names = list_classes)
print(cr)

### **Confusion Matrix**

In [None]:
from sklearn.metrics import multilabel_confusion_matrix

#you can fine tune the threshold for increasing recall or precision
y_pred_col0 = np.where(y_pred[:,0] > 0.4, 1, 0)
y_pred_col1 = np.where(y_pred[:,1] > 0.5, 1, 0)
y_pred_col2 = np.where(y_pred[:,2] > 0.3, 1, 0)
y_pred_col3 = np.where(y_pred[:,3] > 0.5, 1, 0)
y_pred_col4 = np.where(y_pred[:,4] > 0.5, 1, 0)
y_pred_col5 = np.where(y_pred[:,5] > 0.5, 1, 0)

y_pred_col0 = np.expand_dims(y_pred_col0, axis=1)
y_pred_col1 = np.expand_dims(y_pred_col1, axis=1)
y_pred_col2 = np.expand_dims(y_pred_col2, axis=1)
y_pred_col3 = np.expand_dims(y_pred_col3, axis=1)
y_pred_col4 = np.expand_dims(y_pred_col4, axis=1)
y_pred_col5 = np.expand_dims(y_pred_col5, axis=1)

y_pred_colTot = np.concatenate((y_pred_col0, y_pred_col1, y_pred_col2, y_pred_col3, y_pred_col4, y_pred_col5), axis=1)

mcm = multilabel_confusion_matrix(y_test, y_pred_colTot, sample_weight=None, samplewise=False)

fig = plt.figure(figsize = (12,10))
for i in range(1,7):
    plt.subplot(2,3,i)
    if i%2==0:
        cmap = "Reds"
    else:
        cmap = "Blues"
    sns.set(font_scale=0.8)
    title = '{}'.format(list_classes[i-1])
    plt.title(title, fontsize = 15)
    sns.heatmap(mcm[i-1], cmap=cmap, square=True, fmt='.0f', cbar=False, annot=True)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
history_model_LSTM.history.keys()

### **Training-Validation Accuracy**

In [None]:
import matplotlib.pyplot as plt

# Plot training and validation accuracy over epochs
plt.plot(history_model_LSTM.history['accuracy'], label='Training Accuracy')  # training accuracy
plt.plot(history_model_LSTM.history['val_accuracy'], label='Validation Accuracy')  # validation accuracy
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

### **Training-Validation Loss**

In [None]:
import matplotlib.pyplot as plt

# Plot training and validation accuracy over epochs
plt.plot(history_model_LSTM.history['loss'], label='Training Loss')  # training accuracy
plt.plot(history_model_LSTM.history['val_loss'], label='Validation Loss')  # validation accuracy
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# **Bi-GRU Model Training**

In [None]:
from tensorflow.keras.layers import GRU

In [None]:
from keras.callbacks import EarlyStopping,ModelCheckpoint

filepath1="best_gru.weights.h5"
mcp1 = ModelCheckpoint(filepath1, monitor='val_loss', verbose=1, save_best_only=True,  save_weights_only=True, mode='min')
earlystop1 = EarlyStopping(monitor="val_accuracy", mode="max", patience=4)
RocAuc_val1 = RocAucEvaluation(validation_data=(X_val_padded, y_val), interval = 1)

callbacks_list1 = [RocAuc_val1, mcp1, earlystop1]

In [None]:
model_GRU = Sequential([
    Input(shape=(maxlen,)),
    Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False),
    Bidirectional(GRU(64, return_sequences=True, dropout=0.1, recurrent_dropout=0.1, kernel_initializer='he_normal')),
    GlobalMaxPooling1D(),
    Dense(50, activation="relu"),
    Dropout(0.1),
    Dense(6, activation="sigmoid")
])

In [None]:
model_GRU.compile(loss=get_weighted_loss(class_weights), optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
model_GRU.summary()

In [None]:
history_model_GRU = model_GRU.fit(X_train_padded,
                                    y_train,
                                    batch_size=batch_size,
                                    epochs=epochs,
                                    validation_data=(X_val_padded, y_val),
                                    callbacks = callbacks_list1,
                                    verbose=1)

# **Bi-GRU Model Evaluation**

In [None]:
test_loss_gru, test_accuracy_gru = model_GRU.evaluate(X_test_padded, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy_gru:.4f}")

In [None]:
from sklearn.metrics import accuracy_score

list_classes = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

model_GRU.load_weights(filepath1)
# Make predictions on the evaluation data
y_pred1 = model_GRU.predict(X_test_padded, verbose=1)

# For each class, compute accuracy
for i, class_name in enumerate(list_classes):  # Assuming list_classes contains the class names
    y_true_class = y_test[:, i]  # True labels for the class
    y_pred_class = (y_pred1[:, i]).astype(int)  # Convert predicted probabilities to binary labels
    class_accuracy = accuracy_score(y_true_class, y_pred_class)
    print(f"Accuracy for {class_name}: {class_accuracy:.4f}")

### **Training-Validation Accuracy**

In [None]:
import matplotlib.pyplot as plt

# Plot training and validation accuracy over epochs
plt.plot(history_model_GRU.history['accuracy'], label='Training Accuracy')  # training accuracy
plt.plot(history_model_GRU.history['val_accuracy'], label='Validation Accuracy')  # validation accuracy
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

### **Training-Validation Loss**

In [None]:
plt.plot(history_model_GRU.history['loss'], label='Training Loss')  # training accuracy
plt.plot(history_model_GRU.history['val_loss'], label='Validation Loss')  # validation accuracy
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

### **Classification Report**

In [None]:
from sklearn.metrics import classification_report

cr = classification_report(y_test, np.round(y_pred1), target_names = list_classes)
print(cr)

### **Confusion Matrix**

In [None]:
from sklearn.metrics import multilabel_confusion_matrix

#you can fine tune the threshold for increasing recall or precision
y_pred_col0 = np.where(y_pred1[:,0] > 0.5, 1, 0)
y_pred_col1 = np.where(y_pred1[:,1] > 0.5, 1, 0)
y_pred_col2 = np.where(y_pred1[:,2] > 0.5, 1, 0)
y_pred_col3 = np.where(y_pred1[:,3] > 0.4, 1, 0)
y_pred_col4 = np.where(y_pred1[:,4] > 0.5, 1, 0)
y_pred_col5 = np.where(y_pred1[:,5] > 0.5, 1, 0)

y_pred_col0 = np.expand_dims(y_pred_col0, axis=1)
y_pred_col1 = np.expand_dims(y_pred_col1, axis=1)
y_pred_col2 = np.expand_dims(y_pred_col2, axis=1)
y_pred_col3 = np.expand_dims(y_pred_col3, axis=1)
y_pred_col4 = np.expand_dims(y_pred_col4, axis=1)
y_pred_col5 = np.expand_dims(y_pred_col5, axis=1)

y_pred_colTot = np.concatenate((y_pred_col0, y_pred_col1, y_pred_col2, y_pred_col3, y_pred_col4, y_pred_col5), axis=1)

mcm = multilabel_confusion_matrix(y_test, y_pred_colTot, sample_weight=None, samplewise=False)

fig = plt.figure(figsize = (12,10))
for i in range(1,7):
    plt.subplot(2,3,i)
    if i%2==0:
        cmap = "Reds"
    else:
        cmap = "Blues"
    sns.set(font_scale=0.8)
    title = '{}'.format(list_classes[i-1])
    plt.title(title, fontsize = 15)
    sns.heatmap(mcm[i-1], cmap=cmap, square=True, fmt='.0f', cbar=False, annot=True)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')