In [2]:
#Bi-directional Gated Recurrent Unit Neural Network with token Level Attention Mechanism
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense, Attention, Concatenate
from tensorflow.keras.layers import GlobalMaxPooling1D

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset
df = pd.read_csv('/content/final_dataset - Sheet1.csv')

# Split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Tokenize the comments
max_words = 10000
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(train_df['Comment'])

# Convert comments to sequences
train_sequences = tokenizer.texts_to_sequences(train_df['Comment'])
test_sequences = tokenizer.texts_to_sequences(test_df['Comment'])

# Pad sequences
max_sequence_length = max(len(seq) for seq in train_sequences)
X_train = pad_sequences(train_sequences, maxlen=max_sequence_length)
X_test = pad_sequences(test_sequences, maxlen=max_sequence_length)

# Prepare target data
y_train = np.array(train_df['Stance'])
y_test = np.array(test_df['Stance'])

# Prepare target-specific information
target_info_train = np.array(train_df['Target'])
target_info_test = np.array(test_df['Target'])

# Build ATGRU model
embedding_dim = 100

# Input layers
input_comment = Input(shape=(max_sequence_length,), name='input_comment')
input_target = Input(shape=(1,), name='input_target')

# Embedding layer for comments
embedding_layer = Embedding(input_dim=max_words, output_dim=embedding_dim)(input_comment)

# Bidirectional GRU layer
gru_layer = Bidirectional(GRU(64, return_sequences=True))(embedding_layer)

# Attention layer
# attention_layer = Attention()([gru_layer, tf.tile(tf.expand_dims(input_target, axis=1), multiples=[1, max_sequence_length, 1])])
attention_layer = Attention()([gru_layer, tf.tile(tf.expand_dims(input_target, axis=-1), multiples=[1, 1, 128])])


# Concatenate attention output with target information
merged_layer = Concatenate()([gru_layer, attention_layer])

# GlobalMaxPooling1D layer
global_max_pooling_layer = GlobalMaxPooling1D()(merged_layer)

# Dense layers
dense_layer = Dense(32, activation='relu')(global_max_pooling_layer)
output_layer = Dense(1, activation='sigmoid', name='output')(dense_layer)

# Build and compile the model
model = Model(inputs=[input_comment, input_target], outputs=output_layer)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit([X_train, target_info_train], y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
y_pred = model.predict([X_test, target_info_test])
y_pred_binary = (y_pred > 0.5).astype(int)

# Overall metrics
test_loss, test_accuracy = model.evaluate([X_test, target_info_test], y_test)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')

# Calculate metrics for each target class
unique_targets = df['Target'].unique()

for target_class in unique_targets:
    indices = test_df[test_df['Target'] == target_class].index
    indices = indices[indices < len(y_test)]  # Ensure indices are within bounds

    y_test_target = y_test[indices]
    y_pred_target = y_pred_binary[indices]

    accuracy = accuracy_score(y_test_target, y_pred_target)
    precision = precision_score(y_test_target, y_pred_target)
    recall = recall_score(y_test_target, y_pred_target)
    f1 = f1_score(y_test_target, y_pred_target)

    print(f'Target {target_class} Metrics:')
    print(f'  Accuracy: {accuracy * 100:.2f}%')
    print(f'  Precision: {precision:.4f}')
    print(f'  Recall: {recall:.4f}')
    print(f'  F1 Score: {f1:.4f}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.3377, Test Accuracy: 91.85%
Target 1 Metrics:
  Accuracy: 91.57%
  Precision: 0.8571
  Recall: 0.8889
  F1 Score: 0.8727
Target 0 Metrics:
  Accuracy: 50.00%
  Precision: 0.0000
  Recall: 0.0000
  F1 Score: 0.0000
Target 5 Metrics:
  Accuracy: 90.00%
  Precision: 1.0000
  Recall: 0.7500
  F1 Score: 0.8571
Target 3 Metrics:
  Accuracy: 91.67%
  Precision: 0.9091
  Recall: 0.7895
  F1 Score: 0.8451
Target 4 Metrics:
  Accuracy: 100.00%
  Precision: 0.0000
  Recall: 0.0000
  F1 Score: 0.0000
Target 2 Metrics:
  Accuracy: 66.67%
  Precision: 0.5000
  Recall: 1.0000
  F1 Score: 0.6667


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
