In [1]:
!pip install keras-tuner tensorflow-addons

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: kt-legacy, typeguard, tensorflow-addons, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5 tensorflow-addons-0.23.0 typeguard-2.13.3


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import nltk
import numpy as np
from pprint import pprint
import re
import os
import requests
import zipfile
import json
import glob
from urllib import request
from tqdm import tqdm
import itertools
from functools import reduce

import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Concatenate, Dense, Dropout, GlobalAveragePooling1D, Bidirectional, Masking

nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [4]:
path_to_glove = './drive/MyDrive/glove'

In [5]:
# Load dataset
train_data = pd.read_csv('train.csv')
dev_data = pd.read_csv('dev.csv') # dev_data but for now naming it to trial_data
trial_data = pd.read_csv('test.csv')
text = train_data['Evidence'].iloc[78]

In [6]:
# from sklearn.model_selection import train_test_split

# # Split the data into training and validation sets (80/20 split)
# train_data, dev_data = train_test_split(train_data, test_size=0.2, random_state=42)

# # Display the shape of the training and validation data
# train_data.shape, dev_data.shape,trial_data.shape

In [7]:
# analyze the claim and evidence text non-alpha and non-numeric to understand what kind of preprocessing is needed
claim_words = [word for sentence in train_data['Claim'] for word in nltk.word_tokenize(sentence) if not word.isalpha() and not word.isnumeric()]
evidence_words = [word for sentence in train_data['Evidence'] for word in nltk.word_tokenize(sentence) if not word.isalpha() and not word.isnumeric()]

claim_words_occurences = dict(zip(*np.unique(claim_words, return_counts=True)))
evidence_words_occurences = dict(zip(*np.unique(evidence_words, return_counts=True)))

In [8]:
REPLACE_BY_SPACE_RE = re.compile(r'[/(){}\[\]\|@`\']')
REMOVE_BEGINNING_EVIDENCES_RE = re.compile(r'\b[0-9]{1,}')
REMOVE_REF_EVIDENCE_RE = re.compile(r'\[REF|ref\]?\.?')
SPLIT_COMPOUND_RE = re.compile(r'\w+(-)\w+')
GOOD_SYMBOLS_RE = re.compile(r'[^0-9a-z \.;]')

def lower(text: str) -> str:
    """
    Transforms given text to lower case.
    Example:
    Input: 'I really like New York city'
    Output: 'i really like new your city'
    """
    return text.lower()

def remove_beginning_evidence(text: str) -> str:
    """
    Removes the beginning evidence from the text.
    Example:
    Input: '1. I really like New York city'
    Output: 'I really like New York city'
    """
    return REMOVE_BEGINNING_EVIDENCES_RE.sub('', text)

def split_compound_words(text: str) -> str:
    """
    Splits compound words in the text.
    Example:
    Input: 'I really like Winston-Salem city'
    Output: 'I really like Winston Salem city'
    """
    return SPLIT_COMPOUND_RE.sub(' ', text)

def replace_special_characters(text: str) -> str:
    """
    Replaces special characters, such as paranthesis,
    with spacing character
    """
    return REPLACE_BY_SPACE_RE.sub(' ', text)

def remove_reference_markers(text: str) -> str:
    """
    Remove [REF] or [REF at the at of evidence texts
    """
    if isinstance(text, str):
        return REMOVE_REF_EVIDENCE_RE.sub('', text)
    else:
        return ""

def filter_out_uncommon_symbols(text: str) -> str:
    """
    Removes any special character that is not in the
    good symbols list (check regular expression)
    """
    return GOOD_SYMBOLS_RE.sub('', text)

def strip_text(text: str) -> str:
    """
    Removes any left or right spacing (including carriage return) from text.
    Example:
    Input: '  This assignment is cool\n'
    Output: 'This assignment is cool'
    """
    return text.strip()

PREPROCESSING_PIPELINE_CLAIM = [
                          lower,
                          replace_special_characters,
                          split_compound_words,
                          filter_out_uncommon_symbols,
                          strip_text
                          ]

PREPROCESSING_PIPELINE_EVIDENCE = [
                          remove_reference_markers,
                          remove_beginning_evidence,
                          lower,
                          replace_special_characters,
                          split_compound_words,
                          filter_out_uncommon_symbols,
                          strip_text
                          ]


def text_prepare(text, filter_methods):
    """
    Applies a list of pre-processing functions in sequence (reduce).
    Note that the order is important here!
    """
    return reduce(lambda txt, f: f(txt), filter_methods, text)

print('Pre-processing text...')

print()
print('[Debug] Before:\n{}'.format(train_data[['Claim', 'Evidence']][:3]))
print()

# Replace each sentence with its pre-processed version
train_data['Claim'] = train_data['Claim'].apply(lambda txt: text_prepare(txt, PREPROCESSING_PIPELINE_CLAIM))
train_data['Evidence'] = train_data['Evidence'].apply(lambda txt: text_prepare(txt, PREPROCESSING_PIPELINE_EVIDENCE))

dev_data['Claim'] = dev_data['Claim'].apply(lambda txt: text_prepare(txt, PREPROCESSING_PIPELINE_CLAIM))
dev_data['Evidence'] = dev_data['Evidence'].apply(lambda txt: text_prepare(txt, PREPROCESSING_PIPELINE_EVIDENCE))

trial_data['Claim'] = trial_data['Claim'].apply(lambda txt: text_prepare(txt, PREPROCESSING_PIPELINE_CLAIM))
trial_data['Evidence'] = trial_data['Evidence'].apply(lambda txt: text_prepare(txt, PREPROCESSING_PIPELINE_EVIDENCE))

print('[Debug] After:\n{}'.format(train_data[['Claim', 'Evidence']][:3]))
print()

print("Pre-processing completed!")

Pre-processing text...

[Debug] Before:
                                         Claim  \
0  We should legalize the growing of coca leaf   
1       We should ban trans fats usage in food   
2              We should legalize prostitution   

                                            Evidence  
0  Robert W. Sweet, a federal judge, strongly agr...  
1  The net increase in LDL/HDL ratio with trans f...  
2  Pertaining to health, safety and services, the...  

[Debug] After:
                                         Claim  \
0  we should legalize the growing of coca leaf   
1       we should ban trans fats usage in food   
2              we should legalize prostitution   

                                            Evidence  
0  robert w. sweet a federal judge strongly agree...  
1  the net increase in ldl hdl ratio with trans f...  
2  pertaining to health safety and services the r...  

Pre-processing completed!


In [9]:
class NotAdaptedError(Exception):
    pass


class TextVectorizer:
    def __init__(
        self,
        glove_url="http://nlp.stanford.edu/data/glove.6B.zip",
        embedding_dim=100,
        embedding_folder="glove"
    ):
        """
        This class parses the GloVe embeddings, the input documents are expected
        to be in the form of a list of lists.
        [["word1", "word2", ...], ["word1", "word2", ...], ...]

        Parameters
        ----------
        glove_url : The url of the GloVe embeddings.
        embedding_dim : The dimension of the embeddings (pick one of 50, 100, 200, 300).
        embedding_folder : folder where the embedding will be downloaded
        """
        self.embedding_dim = embedding_dim
        self.download_glove_if_needed(
            glove_url=glove_url, embedding_folder=embedding_folder
        )

        # create the embeddings vocabulary
        self.vocabulary = self.parse_glove(embedding_folder)

    def download_glove_if_needed(self, glove_url, embedding_folder):
        """
        Downloads the glove embeddings from the internet

        Parameters
        ----------
        glove_url : The url of the GloVe embeddings.
        embedding_folder: folder where the embedding will be downloaded
        """
        # create embedding folder if it does not exist
        if not os.path.exists(embedding_folder):
            os.makedirs(embedding_folder)

        # extract the embedding if it is not extracted
        if not glob.glob(
            os.path.join(embedding_folder, "**/glove*.txt"), recursive=True
        ):

            # download the embedding if it does not exist
            embedding_zip = os.path.join(embedding_folder, glove_url.split("/")[-1])
            print(embedding_zip)
            if not os.path.exists(embedding_zip):
                print("Downloading the GloVe embeddings...")
                request.urlretrieve(glove_url, embedding_zip)
                print("Successful download!")

            # extract the embedding
            print("Extracting the embeddings...")
            with zipfile.ZipFile(embedding_zip, "r") as zip_ref:
                zip_ref.extractall(embedding_folder)
                print("Successfully extracted the embeddings!")
            os.remove(embedding_zip)

    def parse_glove(self, embedding_folder):
        """
        Parses the GloVe embeddings from their files, filling the vocabulary.

        Parameters
        ----------
        embedding_folder : folder where the embedding files are stored

        Returns
        -------
        dictionary representing the vocabulary from the embeddings
        """
        print("Creating glove vocabulary...")
        vocabulary = {"<pad>": np.zeros(self.embedding_dim)}
        embedding_file = os.path.join(
            embedding_folder, "glove.6B." + str(self.embedding_dim) + "d.txt"
        )
        print(embedding_file)
        with open(embedding_file, encoding="utf8") as f:
            for line in f:
                word, coefs = line.split(maxsplit=1)
                coefs = np.fromstring(coefs, "f", sep=" ")
                vocabulary[word] = coefs
        return vocabulary
    def adapt(self, dataset, columns):
        """
        Computes the OOV words for a single data split, and adds them to the vocabulary and recreate the dictionary of index encodings.
        Then build the embedding matrix.

        Parameters
        ----------
        dataset : The data split (might be training set, validation set, or test set).
        columns : The columns to be adapted.

        Returns
        ----------
        The embedding matrix of shape (vocabulary_size, embedding_dim)
        """
        # create a set containing words from the documents in a given data split
        words = {word for column in columns for sentence in dataset[column] for word in nltk.word_tokenize(sentence)}
        oov_words = words - self.vocabulary.keys()

        # add the OOV words to the vocabulary giving them a random encoding
        for word in oov_words:
            self.vocabulary[word] = np.random.uniform(-1, 1, size=self.embedding_dim)

        # create the dictionary of index encodings for the words in the embedding vocabulary (idx 0 is reserved for padding)
        self.word_to_idx = {word: i for i, word in enumerate(self.vocabulary.keys())}
        self.idx_to_word = {i: word for i, word in enumerate(self.vocabulary.keys())}

        # the embedding matrix shape will be (vocabulary_size, embedding_dim)
        self.embedding_matrix = np.array(list(self.vocabulary.values()))
        print(f"Generated embeddings for {len(oov_words)} OOV words.")

    def transform(self, dataset, columns):
        """
        Transform the data into the input structure for the training. This method should be used always after the adapt method.

        Parameters
        ----------
        dataset : The data split (might be training set, validation set, or test set).
        columns : The columns to be transformed.

        Returns
        -------
        Pair of docuemnts into idx sequences
        """
        X_claim, X_evidence = [], []
        for _, row in tqdm(dataset.iterrows(), total=len(dataset), desc="Converting data into idx sequences..."):
            X_claim.append(self._transform_document_to_encoding(row["Claim"]))
            X_evidence.append(self._transform_document_to_encoding(row["Evidence"]))
        return X_claim, X_evidence


    def _transform_document_to_encoding(self, document):
        """
        Transforms a single document to a list of word encodings.

        Parameters
        ----------
        document : The document to be transformed.

        Returns
        -------
        List of word encodings
        """
        try:
            return [self.word_to_idx[word] for word in nltk.word_tokenize(document)]
        except KeyError:
            raise NotAdaptedError(
                f"The whole document is not in the vocabulary. Please adapt the vocabulary first."
            )

In [10]:
def encode_input(dataset, columns, vectorizer, is_training=False, max_tokens=None):
    """
    Convert the text into a given dataset split into idx sequeces.

    Parameters
    ----------
    dataset : The data split (might be training set, validation set, or test set).
    columns : The columns to be converted.
    vectorizer : The vectorizer to be used.
    is_training : Whether input texts are from the training split or not
    max_tokens : The max token sequence previously computed with

    Return
    ---------
    X_claim: a numpy array of shape (num_documents, max_tokens) representing claims
    X_evidence: a numpy array of shape (num_documents, max_tokens) representing evidences
    max_seq_length: the max token sequence computed with training texts.
    """

    # compute embeddings for terms in the dataset that are out of vocabulary and add them
    vectorizer.adapt(dataset, columns)

    # use the vocabulary of word_to_idx built to convert the claim and the evidences into idx sequences
    X_claim, X_evidence = vectorizer.transform(dataset, columns)

    # compute max_tokens
    if is_training:
        max_tokens = int(np.quantile([len(seq) for seq in X_claim+X_evidence], 0.999))
    else:
        assert max_tokens is not None

    # apply padding to idx sequences
    X_claim = [seq + [0] * (max_tokens - len(seq)) for seq in X_claim]
    X_evidence = [seq + [0] * (max_tokens - len(seq)) for seq in X_evidence]
    X_claim = np.stack([seq[:max_tokens] for seq in X_claim])
    X_evidence = np.stack([seq[:max_tokens] for seq in X_evidence])


    if is_training:
        return X_claim, X_evidence, max_tokens
    else:
        return X_claim, X_evidence


def encode_target(target_series):
    """
    Encodes the target column of the dataset
    """
    return target_series.apply(lambda x: 1 if x == 1 else 0)

In [11]:
# initialize the vectorizer
embedding_dim = 300
vectorizer = TextVectorizer(embedding_dim=embedding_dim, embedding_folder=path_to_glove)

input_columns = ["Evidence", "Claim"]
target_column = "label"

# Train
print("\nTRAINING SET:")
X_claim_train, X_evidence_train, max_tokens = encode_input(train_data, columns=input_columns, vectorizer=vectorizer, is_training=True)
y_train = encode_target(train_data[target_column])
print("Max token sequence: {}".format(max_tokens))

print('X claim train shape: ', X_claim_train.shape)
print('X evidence train shape: ', X_evidence_train.shape)
print('y train shape: ', y_train.shape)

# Validation
print("\DEVELOPMENT SET:")
X_claim_val, X_evidence_val = encode_input(dev_data, columns=input_columns, vectorizer=vectorizer, max_tokens=max_tokens)
y_val = encode_target(dev_data[target_column])

print("X claim dev shape: ", X_claim_val.shape)
print("X evidence dev shape: ", X_evidence_val.shape)
print("y dev shape: ", y_val.shape)

# Test
print("\nTEST SET:")
X_claim_test, X_evidence_test = encode_input(trial_data, columns=input_columns, vectorizer=vectorizer, max_tokens=max_tokens)
# y_test = encode_target(trial_data[target_column])

print("X claim test shape: ", X_claim_test.shape)
print("X evidence test shape: ", X_evidence_test.shape)
# print("y trial shape: ", y_test.shape)

print("\nEmbedding matrix shape: {}".format(vectorizer.embedding_matrix.shape))

Creating glove vocabulary...
./drive/MyDrive/glove/glove.6B.300d.txt

TRAINING SET:
Generated embeddings for 3140 OOV words.


Converting data into idx sequences...: 100%|██████████| 23702/23702 [00:09<00:00, 2547.39it/s]


Max token sequence: 118
X claim train shape:  (23702, 118)
X evidence train shape:  (23702, 118)
y train shape:  (23702,)
\DEVELOPMENT SET:
Generated embeddings for 759 OOV words.


Converting data into idx sequences...: 100%|██████████| 5926/5926 [00:02<00:00, 2580.79it/s]


X claim dev shape:  (5926, 118)
X evidence dev shape:  (5926, 118)
y dev shape:  (5926,)

TEST SET:
Generated embeddings for 489 OOV words.


Converting data into idx sequences...: 100%|██████████| 4691/4691 [00:01<00:00, 2371.65it/s]


X claim test shape:  (4691, 118)
X evidence test shape:  (4691, 118)

Embedding matrix shape: (404390, 300)


In [12]:
# Step 2: Configure GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# Additional setup: Check if GPUs are available and memory growth is set
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
else:
    print("No GPUs found")


def build_model(max_tokens, embedding_matrix, choice_info, compile_info):
    """
    Use functional API of tf.keras to build the model.

    Parameters
    ----------
    max_tokens : The max token sequence previously computed
    embedding_matrix : The embedding matrix to be used.
    choice_info : dictionary containing choice information about embedding and merging mode, and other choice points
    compile_info: dictionary containing compile information

    Return
    ----------
    model: the built keras functional model
    """

    assert choice_info["merging_mode"] in ["concat", "add", "mean"]
    assert choice_info["embedding_mode"] in ["bi_lstm"]
    assert isinstance(choice_info["dropout_rate"], float)
    assert(all(isinstance(item, int) for item in choice_info["classification_units"]))

    regularizer = tf.keras.regularizers.l2(0.01)

    # embedding of claim and evidence layers
    claim_input = Input(shape=(max_tokens), name="claim")
    claim_masking = Masking(mask_value=0, name="claim_masking")(claim_input)
    evidence_input = Input(shape=(max_tokens), name="evidence")
    evidence_masking = Masking(mask_value=0, name="evidence_masking")(evidence_input)

    claim_embedding = Embedding(input_dim=embedding_matrix.shape[0],  # vocab size
                                output_dim=embedding_matrix.shape[1], # embedding dim
                                weights=[embedding_matrix],
                                trainable=False,
                                mask_zero=True,
                                name="claim_embedding")(claim_masking)
    evidence_embedding = Embedding(input_dim=embedding_matrix.shape[0],  # vocab size
                                   output_dim=embedding_matrix.shape[1], # embedding dim
                                   weights=[embedding_matrix],
                                   trainable=False,
                                   mask_zero=True,
                                   name="evidence_embedding")(evidence_masking)

    if choice_info["embedding_mode"] == "bi_lstm":
        claim_encoding = Bidirectional(LSTM(units=embedding_matrix.shape[1], return_sequences=True), merge_mode='concat', name="claim_encoding")(claim_embedding)
        evidence_encoding = Bidirectional(LSTM(units=embedding_matrix.shape[1], return_sequences=True), merge_mode='concat', name="evidence_encoding")(evidence_embedding)
        claim_encoding = GlobalAveragePooling1D(name="claim_encoding_avg")(claim_encoding)
        evidence_encoding = GlobalAveragePooling1D(name="evidence_encoding_avg")(evidence_encoding)

    # merging layer
    if choice_info["merging_mode"] == "concat":
        merged = tf.keras.layers.concatenate([claim_encoding, evidence_encoding])
    elif choice_info["merging_mode"] == "add":
        merged = tf.keras.layers.add([claim_encoding, evidence_encoding])
    elif choice_info["merging_mode"] == "mean":
        merged = tf.keras.layers.average([claim_encoding, evidence_encoding])

    classification_input = merged

    # classification layers
    for i, units in enumerate(choice_info["classification_units"]):
        dense = Dense(units=units, activation="relu", kernel_regularizer=regularizer, name=f"classification_{i+1}")(classification_input)
        classification_input = Dropout(rate=choice_info["dropout_rate"], name=f"dropout_{i+1}")(dense)
    dense = Dense(units=1, activation="sigmoid", kernel_regularizer=regularizer, name="output")(classification_input)

    model = Model(inputs=[claim_input, evidence_input], outputs=dense)
    model.compile(**compile_info)
    return model

1 Physical GPUs, 1 Logical GPUs


In [13]:
import tensorflow as tf

class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.round(y_pred)
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + tf.keras.backend.epsilon()))

    def reset_states(self):
        self.precision.reset_state()
        self.recall.reset_state()


In [14]:
# epochs_tuning = 30
# batch_size_tuning = 64

# compile_info_tuning = {
#     'optimizer': tf.keras.optimizers.legacy.Adam(learning_rate=1e-3),
#     'loss': 'binary_crossentropy',
#     'metrics': [F1Score()],
# }



# tuner = kt.Hyperband(lambda hp: build_model(max_tokens,
#                                             vectorizer.embedding_matrix, {
#                                                 "embedding_mode": hp.Choice('embedding_mode', ["bi_lstm"]),
#                                                 "merging_mode": hp.Choice('merging_mode', ["concat", "add", "mean"]),
#                                                 "dropout_rate": hp.Choice('dropout_rate', [0.2, 0.3, 0.4]),
#                                                 "classification_units": json.loads(hp.Choice('classification_units', [json.dumps(units) for l in range(1, 3) for units in itertools.permutations([64, 128, 256], l)])),
#                                                 },
#                                              compile_info_tuning),
#                      objective=kt.Objective("val_f1_score", direction="max"),
#                      max_epochs=epochs_tuning,
#                      directory="tuner",
#                      project_name="tuner_fact_checking")


# tuner.search({"claim": X_claim_train, "evidence": X_evidence_train}, y_train, epochs=epochs_tuning, batch_size=batch_size_tuning, validation_data=({"claim": X_claim_val, "evidence": X_evidence_val}, y_val))
# best_hps = tuner.get_best_hyperparameters()[0]

# print(f"The hyperparameter search is complete.\n"
#       f"The optimal sentence embedding mode is {best_hps.get('embedding_mode')}.\n"
#       f"The optimal sentence merging mode is {best_hps.get('merging_mode')}.\n"
#       f"The optimal rate for Dropout layer is {best_hps.get('dropout_rate')}.\n"
#       f"The optimal classification units are {best_hps.get('classification_units')}.")

In [15]:
#Best values determine from hyperparameter tuning.
choice_info = {
    "embedding_mode": "bi_lstm",
    "merging_mode": "concat",
    "dropout_rate" : 0.3,
    "classification_units": [256, 64]
}

compile_info = {
    'optimizer': keras.optimizers.Adam(learning_rate=1e-3),
    'loss': 'binary_crossentropy',
    'metrics': 'accuracy',
}

model_base = build_model(max_tokens, vectorizer.embedding_matrix, choice_info, compile_info)
model_base.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 claim (InputLayer)          [(None, 118)]                0         []                            
                                                                                                  
 evidence (InputLayer)       [(None, 118)]                0         []                            
                                                                                                  
 claim_masking (Masking)     (None, 118)                  0         ['claim[0][0]']               
                                                                                                  
 evidence_masking (Masking)  (None, 118)                  0         ['evidence[0][0]']            
                                                                                              

In [16]:
batch_size_train = 64
epochs_training = 50
callbacks_training = [keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, mode="max", restore_best_weights=True)]

history_base = model_base.fit({"claim": X_claim_train, "evidence": X_evidence_train}, y_train, epochs=epochs_training, batch_size=batch_size_train,  validation_data=({"claim": X_claim_val, "evidence": X_evidence_val}, y_val), callbacks=callbacks_training)
history_base = history_base.history

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


In [17]:
# save models
# models_path = "models"
models_path_2 = './drive/MyDrive/'
model_base.save(os.path.join(models_path_2, "bi_lstm_pooling"))

In [18]:
# convert the history.history dict to a pandas DataFrame:
path_model_history = os.path.join(models_path_2,'history')
# Now, create the directory if it doesn't exist
os.makedirs(path_model_history, exist_ok=True)  # This will create the directory if it does not exist

In [19]:
df_history_base = pd.DataFrame(history_base)
with open(os.path.join(path_model_history, "history_base.csv"), mode="w") as file:
    df_history_base.to_csv(file)

In [20]:
#Reload model
model_base = tf.keras.models.load_model(os.path.join(models_path_2, "bi_lstm_pooling"))
# Restore history
cols = ['loss','accuracy','val_loss','val_accuracy']
path_model_history = os.path.join(models_path_2, 'history')
history_base = pd.read_csv(os.path.join(path_model_history, "history_base.csv"), usecols=cols) # Use this to plot graph?

In [21]:
import csv
from collections import Counter

def evaluate_model(model, X_claim_test, X_evidence_test, file_name='Group_16_B.csv'):
    """
    Show classification report using model to predict output on X_test,
    write predictions to a CSV file, and count the number of predictions as 1s and 0s.
    """
    # Predicting the outputs
    y_pred = model.predict({"claim": X_claim_test, "evidence": X_evidence_test})
    y_pred = [1 if y > 0.5 else 0 for y in y_pred]

    # Writing predictions to a CSV file
    with open(file_name, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['prediction'])  # Writing the header
        for pred in y_pred:
            writer.writerow([pred])  # Writing each prediction on a new row

    # Counting the number of 1s and 0s
    counts = Counter(y_pred)
    print(f"Number of 1s (SUPPORTED): {counts[1]}")
    print(f"Number of 0s (REFUTED): {counts[0]}")
    return y_pred

In [22]:
predictions = evaluate_model(model_base, X_claim_val, X_evidence_val,'validation_preds.csv')

Number of 1s (SUPPORTED): 1520
Number of 0s (REFUTED): 4406


In [23]:
predictions_test = evaluate_model(model_base,X_claim_test,X_evidence_test)

Number of 1s (SUPPORTED): 2365
Number of 0s (REFUTED): 2326


In [24]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Calculate Accuracy
accuracy = accuracy_score(y_val, predictions)
print(f"Accuracy: {accuracy:.2f}")

# Calculate Precision
precision = precision_score(y_val, predictions, average='macro')
print(f"Precision: {precision:.2f}")

# Calculate Recall
recall = recall_score(y_val, predictions, average='macro')
print(f"Recall: {recall:.2f}")

# Calculate F1 Score
f1 = f1_score(y_val, predictions, average='macro')
print(f"F1 Score: {f1:.2f}")


Accuracy: 0.83
Precision: 0.79
Recall: 0.78
F1 Score: 0.78
