# b)  Deep learning-based approaches that do not employ transformer architectures

The final model used is Bi-directional GRU and GloVe embedding.

Link to the model:https://drive.google.com/file/d/1WC6CifsnJe2Xs8NFO86beBxayjRTLt-a/view?usp=sharing

In [None]:
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
from keras.models import Model
from keras.layers import Input, GRU, Embedding, Dense, concatenate, Dropout, Bidirectional, Attention
import tensorflow as tf

### Read Data ###

In [None]:
training_data = pd.read_csv('train.csv')
dev_data = pd.read_csv('dev.csv')

### Data Preprocess ###
1. Load Tokenizer
2. Convert the text to a sequence of integers
3. Pad all sequences to make them the same length

In [None]:
# set as the max length of all sentences in training data
max_len = 307

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(list(training_data['Claim']) + list(training_data['Evidence']))

# Convert text to sequences of integers
train_claim_seq = tokenizer.texts_to_sequences(training_data['Claim'])
train_evidence_seq = tokenizer.texts_to_sequences(training_data['Evidence'])
dev_claim_seq = tokenizer.texts_to_sequences(dev_data['Claim'])
dev_evidence_seq = tokenizer.texts_to_sequences(dev_data['Evidence'])

# Pad sequences to make the vector has the same length
train_claim_seq = pad_sequences(train_claim_seq, maxlen=max_len)
train_evidence_seq = pad_sequences(train_evidence_seq, maxlen=max_len)
dev_claim_seq = pad_sequences(dev_claim_seq, maxlen=max_len)
dev_evidence_seq = pad_sequences(dev_evidence_seq, maxlen=max_len)

train_labels = training_data['label'].values
dev_labels = dev_data['label'].values

### Word Embedding ###
1. Upload the GloVe embedding file: https://drive.google.com/file/d/1MZnzxd-XPZd7kGS4iE1KmLYQWUGHfpOP/view?usp=sharing
2. Create the embedding matrix based on the word of training data

In [None]:
def load_glove_embeddings(path):
    embeddings_index = {}
    with open(glove_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = vector
    return embeddings_index

# Path to the GloVe embeddings file
glove_path = 'glove.6B.100d.txt'
embeddings_index = load_glove_embeddings(glove_path)

In [None]:
# Set the dimension of embedding as the GloVe file
embedding_dim = 100

# The size of vocabulary
vocab_size = len(tokenizer.word_index) + 1

# Create embedding matrix using zeros as start value
# Add the embedding one by one
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in tokenizer.word_index.items():
    if i < vocab_size:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

### Build Bi-GRU Model  ###

The process and accuracy of each step of building the model
1. One layer of GRU without word embedding: 77.17%
2. Two layers of Bidirectional GRU and an Attention layer without word embedding: 79.26%
3. Two layers of Bidirectional GRU and an Attention layer with GloVe word embedding: 81.20%
4. HyperParameters Tuning based on previous model: 83.13%

In [None]:
# Input layers
claim_input = Input(shape=(max_len,), dtype='int32')
evidence_input = Input(shape=(max_len,), dtype='int32')

# Embedding layer
claim_emb = Embedding(input_dim=vocab_size, output_dim=embedding_dim, embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix), input_length=max_len, trainable=False)(claim_input)
evidence_emb = Embedding(input_dim=vocab_size, output_dim=embedding_dim, embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix), input_length=max_len, trainable=False)(evidence_input)

# Separate Bidirectional GRU layers of claims and evidence
# Second Bidirectional GRU layer is to condense information at the output
claim_gru = Bidirectional(GRU(units=128, return_sequences=True))(claim_emb)
claim_gru = Bidirectional(GRU(units=128))(claim_gru)

evidence_gru = Bidirectional(GRU(units=128, return_sequences=True))(evidence_emb)
evidence_gru = Bidirectional(GRU(units=128))(evidence_gru)

# Attention layer to focus on important parts of the claim and evidence
attention_out = Attention()([claim_gru, evidence_gru])

# Concatenation layer
combined_features = concatenate([claim_gru, evidence_gru, attention_out])

# Dense layer
dense_layer = Dense(128, activation='relu')(combined_features)

# Dropout layer to prevent overfitting
dropout_layer = Dropout(0.7)(dense_layer)

# Output layer
output = Dense(1, activation='sigmoid')(dropout_layer)  # Use 'sigmoid' for binary classification; 'softmax' for multi-class

# Build and compile the model
model = Model(inputs=[claim_input, evidence_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the architecture of model
print(model.summary())


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_5 (InputLayer)        [(None, 307)]                0         []                            
                                                                                                  
 input_6 (InputLayer)        [(None, 307)]                0         []                            
                                                                                                  
 embedding_4 (Embedding)     (None, 307, 100)             4088400   ['input_5[0][0]']             
                                                                                                  
 embedding_5 (Embedding)     (None, 307, 100)             4088400   ['input_6[0][0]']             
                                                                                            

### Train the model ###

In [None]:
history = model.fit([train_claim_seq, train_evidence_seq], train_labels, validation_split = 0.2,
          epochs=5, batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
loss, accuracy = model.evaluate([dev_claim_seq, dev_evidence_seq], dev_labels)
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 83.13%


### Evaluation ###

The details of evaluation can bee seen at https://colab.research.google.com/drive/1QPrZZIAHhJSoKdOt45ULcuHIytDHC5Vp?usp=sharing