# 2024 COMP90042 Project
*Make sure you change the file name with your group id.*

# Readme
*If there is something to be noted for the marker, please mention here.*

*If you are planning to implement a program with Object Oriented Programming style, please put those the bottom of this ipynb file*

# 0.Setting Colab Method for future model developing
Firstly, run the following block to mount the drive to the colab. Then, drag the data folder/**eval.py** to the "Colab Folder Space" to ensure the code runs successfully.

If data folder updated, attempt to forcibly remount, call `drive.mount("/content/drive", force_remount=True)`.


In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

# 1.DataSet Processing

## 1.1 Reading and gathering data

Using `json` package reading and gathering claims and evidences, then print an output.

In [2]:
import json
from collections import Counter
from statistics import mean

with open('data/train-claims.json', 'r') as input_file:
    train_claim_data = json.load(input_file)

# Read in development data (claim)
with open('data/dev-claims.json', 'r') as input_file:
    dev_claim_data = json.load(input_file)

# Read in test data (claim)
with open('data/test-claims-unlabelled.json', 'r') as input_file:
    test_claim_data = json.load(input_file)

# Read in evidence data
with open('data/evidence.json', 'r') as input_file:
    evi_data = json.load(input_file)

#EDA


claim_count = 0
evi_count = 0
claim_length = []
evidence_count = []
evidence_length = []
labels = []

for key,value in train_claim_data.items():
    claim_count+=1
    claim_length.append(len(value["claim_text"]))
    evidence_count.append(len(value["evidences"]))
    evidence_length += [len(evi_data[x]) for x in value["evidences"]]
    labels.append(value["claim_label"])

for key,value in evi_data.items():
    evi_count+=1

print("claim count: ",claim_count)
print("evidence count: ",evi_count)
print("max claim length: ",max(claim_length))
print("min claim length: ",min(claim_length))
print("mean claim length: ",mean(claim_length))
print("max evidence count: ",max(evidence_count))
print("min evidence count: ",min(evidence_count))
print("mean evidence count: ",mean(evidence_count))
print("max evidence length: ",max(evidence_length))
print("min evidence length: ",min(evidence_length))
print("mean evidence length: ",mean(evidence_length))
print(Counter(labels))



inside = 0
outside = 0

train_evi_id = []
for claim_id,claim_value in train_claim_data.items():
    train_evi_id=train_evi_id+claim_value['evidences']

for claim_id,claim_value in dev_claim_data.items():
    test_evi_id=claim_value['evidences']
    for e in test_evi_id:
        if e in train_evi_id:
            inside += 1
        else:
            outside += 1
print("Dev evi inside train evi", inside)
print("Dev evi outside train evi", outside)

full_evidence_id = list(evi_data.keys())
full_evidence_text  = list(evi_data.values())
train_claim_id = list(train_claim_data.keys())
train_claim_text  = [ v["claim_text"] for v in train_claim_data.values()]
print("Train claim count: ",len(train_claim_id))


claim count:  1228
evidence count:  1208827
max claim length:  332
min claim length:  26
mean claim length:  122.95521172638436
max evidence count:  5
min evidence count:  1
mean evidence count:  3.3566775244299674
max evidence length:  1979
min evidence length:  13
mean evidence length:  173.5
Counter({'SUPPORTS': 519, 'NOT_ENOUGH_INFO': 386, 'REFUTES': 199, 'DISPUTED': 124})
Dev evi inside train evi 154
Dev evi outside train evi 0
Train claim count:  1228


## 1.2 Data preprocessing

### Implementing preprocessing fuctions

In [3]:
import nltk
import string
import re
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('wordnet')

lemmatizer = nltk.stem.wordnet.WordNetLemmatizer()
stopwords = set(stopwords.words('english'))

def lemmatize(word):
    lemma = lemmatizer.lemmatize(word, 'v')
    return lemma if lemma != word else lemmatizer.lemmatize(word, 'n')

def is_pure_english(text):
    english_letters = set(string.ascii_letters)
    cleaned_text = ''.join(char for char in text if char.isalpha() or char.isspace())
    return all(char in english_letters or char.isspace() for char in cleaned_text)

def remove_non_eng(dictionary):
    eng_data = {}
    for key, value in dictionary.items():
        if is_pure_english(value):
            eng_data[key] = value
    return eng_data

def contains_climate_keywords(text, keywords):
    text = text.lower()
    for keyword in keywords:
        if re.search(r"\b" + re.escape(keyword) + r"\b", text):
            return True
    return False

def filter_climate_related(dictionary, keywords):
    cs_data = {}
    for key, value in dictionary.items():
        if contains_climate_keywords(value, keywords):
            cs_data[key] = value
    return cs_data

def text_preprocessing(text, remove_stopwords=False):
    words = [lemmatize(w) for w in text.lower().split()]
    if remove_stopwords:
        words = [w for w in words if w not in stopwords]
    return " ".join(words)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ABC\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ABC\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


### Implementing **Claim data preprocessing** and **Evidence data preprocessing** functions

In [4]:
# 权威网站 https://www.ucdavis.edu/climate/definitions
climate_keywords = [
    "climate", "environment", "global warming", "greenhouse effect", "carbon", "co2", "carbon dioxide",
    "methane", "renewable energy", "sustainability", "ecology", "biodiversity", "fossil fuels",
    "emissions", "air quality", "ozone", "solar energy", "wind energy", "climate change", "climate crisis",
    "climate adaptation", "climate mitigation", "ocean", "sea levels", "ice melting", "deforestation",
    "reforestation", "pollution"
]

# def filter_evidence_by_train(train_claim_data, evidence_data):

#     # Collect all evidence ids in the training set
#     train_evidence_ids = set()

#     for claim in train_claim_data.values():
#         train_evidence_ids.update(claim['evidences'])

#     # filter evidence data by the evidence ids in the training set
#     filtered_evidence_data = {key: value for key, value in evidence_data.items() if key in train_evidence_ids}

#     return filtered_evidence_data

def preprocess_claim_data(claim_data, existed_evidences_id=None):
    claim_data = remove_non_eng(claim_data)
    claim_data_text = []
    claim_data_id = []
    claim_data_label = []
    claim_evidences = []

    for key in claim_data.keys():
        claim_data[key]["claim_text"] = text_preprocessing(claim_data[key]["claim_text"])
        claim_data_text.append(claim_data[key]["claim_text"])
        claim_data_id.append(key)

        if "claim_label" in claim_data[key]:
            claim_data_label.append(claim_data[key]["claim_label"])
        else:
            claim_data_label.append(None)

        if existed_evidences_id and "evidences" in claim_data[key]:
            valid_evidences = [existed_evidences_id[i] for i in claim_data[key]["evidences"] if i in existed_evidences_id]
            claim_evidences.append(valid_evidences)
        else:
            claim_evidences.append([])

    return claim_data_text, claim_data_id, claim_data_label, claim_evidences

# def preprocess_evi_data(evi_data, climate_keywords, train_claim_data):
#     evi_data = remove_non_eng(evi_data)
#     # cs_evi_data = filter_climate_related(evi_data, climate_keywords)

#     # filter evidence data by the evidence ids in the training set
#     # train_evi_data = filter_evidence_by_train(train_claim_data, cs_evi_data)

#     for key in evi_data.keys():
#         evi_data[key] = text_preprocessing(evi_data[key], remove_stopwords=True)

#     cleaned_evidence_text = list(evi_data.values())
#     cleaned_evidence_id = list(evi_data.keys())

#     return cleaned_evidence_text, cleaned_evidence_id

def preprocess_evi_data(evi_data, climate_keywords):
    evi_data = remove_non_eng(evi_data)
    cs_evi_data = filter_climate_related(evi_data, climate_keywords)

    # filter evidence data by the evidence ids in the training set
    # train_evi_data = filter_evidence_by_train(train_claim_data, cs_evi_data)

    for key in cs_evi_data.keys():
        cs_evi_data[key] = text_preprocessing(cs_evi_data[key], remove_stopwords=True)

    cleaned_evidence_text = list(cs_evi_data.values())
    cleaned_evidence_id = list(cs_evi_data.keys())

    return cleaned_evidence_text, cleaned_evidence_id

### Start dataset preprocessing

In [5]:
# Preprocessing the evidence data, split the data into text and id
# cleaned_evidence_text, cleaned_evidence_id = preprocess_evi_data(evi_data, climate_keywords, train_claim_data)

cleaned_evidence_text, cleaned_evidence_id = preprocess_evi_data(evi_data, climate_keywords)

# Create a dictionary to map evidence ID to index
evidences_id_dict = {evidence_id: index for index, evidence_id in enumerate(cleaned_evidence_id)}

# Preprocessing the claim data, split the data into text, id, label and evidences
train_claim_text, train_claim_id, train_claim_label, train_claim_evidences = preprocess_claim_data(train_claim_data, evidences_id_dict)

dev_claim_text, dev_claim_id, dev_claim_label, dev_claim_evidences = preprocess_claim_data(dev_claim_data, evidences_id_dict)

test_claim_text, test_claim_id, _, _ = preprocess_claim_data(test_claim_data)


In [6]:
print("Train: ", train_claim_evidences)
print("Dev: ", dev_claim_evidences)

Train:  [[7086, 19094, 213], [5473, 18047], [8496], [18805, 12603, 8653, 5674, 16135], [], [3611, 7893, 9796], [], [481], [], [7026], [3469], [9634], [6203, 17249, 15630], [8271, 16319, 12741, 16160, 14064], [7959], [], [2803, 8271, 16160], [5900, 18382, 7339, 1465, 13080], [], [19217], [2928], [14103], [4982], [], [18927], [], [16204], [], [12324], [], [5360], [], [15220, 11696], [1127], [], [6073, 1905, 14345, 12566, 13909], [], [12721], [], [1995, 12306, 15589], [18052, 10626, 17108], [5900, 1079, 7706], [16422], [10899, 11259], [17772, 14807, 8636, 11955], [18695, 7742, 18854], [], [10254, 4637], [], [10097, 6312], [3502, 12407], [], [9727], [13534, 11512, 3499, 7706], [], [6183, 2258, 9939], [7659], [], [6682], [17865], [13413, 18047, 6129, 11650], [1119], [], [1480, 5637], [5900, 2959, 13093, 16657, 15223], [4266], [18805, 4123], [], [18280, 10549, 11802], [], [11041], [11955], [], [4013, 14899], [], [], [3842, 12507], [9868, 14364], [14345], [720], [18047], [], [], [18506], [293

In [7]:
print("Train claim count: ",len(train_claim_text))
print("Dev claim count: ",len(dev_claim_text))
print("Test claim count: ",len(test_claim_text))
print("Evidence count: ",len(cleaned_evidence_text))

Train claim count:  1228
Dev claim count:  154
Test claim count:  153
Evidence count:  19324


## 1.3 Development Set Prediction

In this section, we perform the main tasks of the project on the development set:

1. **Evidence Retrieval**: For each claim, find the most relevant evidence from the corpus.
2. **Claim Classification**: Predict the label for each claim based on the retrieved evidence and the claim's similarity to the training claims.

The code uses TF-IDF vectorization and cosine similarity to measure the relevance between claims and evidence, and between development and training claims. The most similar evidence and training claims are used for prediction.

In [8]:
import operator
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.spatial.distance import euclidean

# Creating two vectorizer
evidence_tfidf_vectorizer = TfidfVectorizer(max_features=30000, use_idf=True)

# fit the vectorizer on the evidence data
evidence_tfidf_vectorizer.fit(cleaned_evidence_text)

# Transform cleaned_evidence_text
transformed_evidence = evidence_tfidf_vectorizer.transform(cleaned_evidence_text)

# Transform claim data
train_claim_tfidf = evidence_tfidf_vectorizer.transform(train_claim_text)
dev_claim_tfidf = evidence_tfidf_vectorizer.transform(dev_claim_text)
test_claim_tfidf = evidence_tfidf_vectorizer.transform(test_claim_text)

In [9]:
print("Transformed evidence shape: ", transformed_evidence.shape)
print("Transformed train claim shape: ", train_claim_tfidf.shape)
print("Transformed dev claim shape: ", dev_claim_tfidf.shape)

Transformed evidence shape:  (19324, 22890)
Transformed train claim shape:  (1228, 22890)
Transformed dev claim shape:  (154, 22890)


In [10]:
# Calculate cosine similarity between train claims and evidence
train_similarity = cosine_similarity(train_claim_tfidf, transformed_evidence)

# Calculate cosine similarity between dev claims and evidence
dev_similarity = cosine_similarity(dev_claim_tfidf, transformed_evidence)

# Calculate cosine similarity between test claims and evidence
test_similarity = cosine_similarity(test_claim_tfidf, transformed_evidence)


In [11]:
print("Train similarity:", train_similarity)
print("Test similarity: ", test_similarity)

Train similarity: [[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.07193263 0.        ]
 [0.         0.         0.         ... 0.10081634 0.09094364 0.        ]
 ...
 [0.         0.         0.0274485  ... 0.         0.         0.        ]
 [0.         0.         0.04372735 ... 0.         0.07984955 0.03913389]
 [0.         0.         0.01689673 ... 0.         0.         0.        ]]
Test similarity:  [[0.         0.         0.06279525 ... 0.         0.         0.        ]
 [0.07206637 0.         0.04409426 ... 0.         0.06038967 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.09004394 0.         0.02754698 ... 0.         0.         0.        ]
 [0.         0.         0.0306085  ... 0.         0.         0.        ]
 [0.         0.         0.04236803 ... 0.04138184 0.08525478 0.        ]]


In [12]:
# # Calculate Euclidean distance between train claims and evidence
# train_distance = np.zeros((train_claim_tfidf.shape[0], transformed_evidence.shape[0]))
# for i in range(train_claim_tfidf.shape[0]):
#     for j in range(transformed_evidence.shape[0]):
#         train_distance[i, j] = euclidean(train_claim_tfidf[i].toarray().ravel(), transformed_evidence[j].toarray().ravel())

# # Calculate Euclidean distance between dev claims and evidence
# dev_distance = np.zeros((dev_claim_tfidf.shape[0], transformed_evidence.shape[0]))
# for i in range(dev_claim_tfidf.shape[0]):
#     for j in range(transformed_evidence.shape[0]):
#         dev_distance[i, j] = euclidean(dev_claim_tfidf[i].toarray().ravel(), transformed_evidence[j].toarray().ravel())

# # Calculate Euclidean distance between test claims and evidence
# test_distance = np.zeros((test_claim_tfidf.shape[0], transformed_evidence.shape[0]))
# for i in range(test_claim_tfidf.shape[0]):
#     for j in range(transformed_evidence.shape[0]):
#         test_distance[i, j] = euclidean(test_claim_tfidf[i].toarray().ravel(), transformed_evidence[j].toarray().ravel())

# 2. Model Implementation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [13]:
def spliting_dataset(similarity, claim_texts, claim_evidences, evidence_texts, top_k=5, neg_ratio=1, train_mode=False):
    dataset = []
    labels = []
    
    for i in range(similarity.shape[0]):
        claim_text = claim_texts[i]
        
        top_evidences = np.argsort(-similarity[i])[:top_k]
        
        for evidence_index in top_evidences:
            evidence_text = evidence_texts[evidence_index]
            dataset.append("[cls]" + claim_text + "[sep]" + evidence_text)
            
            if claim_evidences is not None and evidence_index in claim_evidences[i]:
                labels.append(1)
            else:
                labels.append(0)
        
        if train_mode and claim_evidences is not None:
            for evidence_index in claim_evidences[i]:
                if evidence_index not in top_evidences:
                    evidence_text = evidence_texts[evidence_index]
                    dataset.append("[cls]" + claim_text + "[sep]" + evidence_text)
                    labels.append(1)
        
        neg_samples_num = int(neg_ratio * top_k)
        neg_evidences = np.random.choice(
            [j for j in range(similarity.shape[1]) if j not in top_evidences and (not train_mode or j not in claim_evidences[i])], 
            neg_samples_num
        )
        
        for evidence_index in neg_evidences:
            evidence_text = evidence_texts[evidence_index]
            dataset.append("[cls]" + claim_text + "[sep]" + evidence_text)
            labels.append(0)
    
    return dataset, labels

In [14]:
train_dataset, train_dataset_labels = spliting_dataset(
    train_similarity, train_claim_text, train_claim_evidences, cleaned_evidence_text, 
    top_k=50, neg_ratio=1, train_mode=True
)
dev_dataset, dev_dataset_labels = spliting_dataset(
    dev_similarity, dev_claim_text, dev_claim_evidences, cleaned_evidence_text, 
    top_k=50, neg_ratio=1, train_mode=False
)
test_dataset, test_dataset_labels = spliting_dataset(
    test_similarity, test_claim_text, None, cleaned_evidence_text, 
    top_k=50, neg_ratio=1, train_mode=False
)

In [15]:
# def spliting_dataset(similarity, claim_texts, claim_evidences, evidence_texts, threshold=0.5, neg_ratio=1):
#     dataset = []
#     labels = []
    
#     # Calculate the mean similarity for each claim
#     mean_similarities = np.mean(similarity, axis=1)
    
#     # Based on the similarity matrix, find the evidence that has similarity higher than the threshold
#     for i in range(similarity.shape[0]):
#         claim_text = claim_texts[i]
        
#         # Find the evidence with similarity higher than the threshold
#         top_evidences = np.where(similarity[i] > threshold * mean_similarities[i])[0]
        
#         # Add the top evidence to the dataset, label as 1
#         for evidence_index in top_evidences:
#             evidence_text = evidence_texts[evidence_index]
#             dataset.append("[cls]" + claim_text + "[sep]" + evidence_text)
#             labels.append(1)
        
#         # If the claim has evidences, add the evidence to the dataset, label as 1
#         if claim_evidences is not None:
#             for evidence_index in claim_evidences[i]:
#                 evidence_text = evidence_texts[evidence_index]
#                 dataset.append("[cls]" + claim_text + "[sep]" + evidence_text)
#                 labels.append(1)
        
#         # Randomly sample negative samples, label as 0
#         neg_samples_num = int(neg_ratio * len(top_evidences))
        
#         # Randomly sample negative samples that are not in the top evidence
#         neg_evidences = np.random.choice(
#             [j for j in range(similarity.shape[1]) if j not in top_evidences], 
#             neg_samples_num
#         )
        
#         # Add the negative samples to the dataset
#         for evidence_index in neg_evidences:
#             evidence_text = evidence_texts[evidence_index]
#             dataset.append("[cls]" + claim_text + "[sep]" + evidence_text)
#             labels.append(0)
    
#     return dataset, labels

In [16]:
# train_dataset, train_dataset_labels = spliting_dataset(
#     train_similarity, train_claim_text, train_claim_evidences, cleaned_evidence_text, threshold=0.8, neg_ratio=1.1
# )
# dev_dataset, dev_dataset_labels = spliting_dataset(
#     dev_similarity, dev_claim_text, dev_claim_evidences, cleaned_evidence_text, threshold=0.8, neg_ratio=1.1
# )
# test_dataset, test_dataset_labels = spliting_dataset(
#     test_similarity, test_claim_text, None, cleaned_evidence_text, threshold=0.8, neg_ratio=1.1
# )

In [17]:
# Convert the dataset labels to numpy array
train_label_array = np.array(train_dataset_labels)
dev_label_array = np.array(dev_dataset_labels)
test_label_array = np.array(test_dataset_labels)

In [18]:
from collections import Counter
print(Counter(train_label_array))
print(Counter(dev_label_array))

Counter({0: 122188, 1: 1829})
Counter({0: 15399, 1: 1})


In [19]:
244825 / 1829

133.85729907053033

In [20]:
30696 / 104

295.15384615384613

In [21]:
# need to install
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(oov_token="<UNK>")
tokenizer.fit_on_texts(train_dataset)

In [22]:
vocab_size = len(tokenizer.word_index) + 1 # 0 is padding token
print("Vocab size: ", vocab_size)

Vocab size:  24716


In [23]:
# Convert the text data to sequence
train_sequence = tokenizer.texts_to_sequences(train_dataset)
dev_sequence = tokenizer.texts_to_sequences(dev_dataset)
test_sequence = tokenizer.texts_to_sequences(test_dataset)

In [24]:
longest_train_sequence = 0
for i in train_sequence:
    longest_train_sequence = max(longest_train_sequence, len(i))

longest_dev_sequence = 0
for i in dev_sequence:
    longest_dev_sequence = max(longest_dev_sequence, len(i))


In [25]:
print("Longest train sequence: ", longest_train_sequence)
print("Longest dev sequence: ", longest_dev_sequence)

Longest train sequence:  250
Longest dev sequence:  225


In [26]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

padding_length = min(longest_train_sequence, longest_dev_sequence) + 1

padded_train_sequence = pad_sequences(train_sequence, maxlen=padding_length, padding='post')
padded_dev_sequence = pad_sequences(dev_sequence, maxlen=padding_length, padding='post')
padded_test_sequence = pad_sequences(test_sequence, maxlen=padding_length, padding='post')

In [27]:
print("Padded train sequence shape: ", padding_length)

Padded train sequence shape:  226


In [35]:
# from workshop
import tensorflow as tf
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.regularizers import l2

def focal_loss(gamma=2., alpha=4.):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.math.reduce_sum(alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1))-tf.math.reduce_sum((1-alpha) * tf.pow( pt_0, gamma) * tf.math.log(1. - pt_0))
    return focal_loss_fixed

embedding_dim = 100
hidden_dim = 200

#model definition
# feedforward network (MLP)
model = Sequential(name="retrieval_cls_lstm")
model.add(layers.Embedding(input_dim=vocab_size,
                           output_dim=embedding_dim,
                           input_length=padding_length, embeddings_regularizer=l2(0.02)))

model.add(layers.Dropout(0.4))
# model.add(LSTM(hidden_dim, return_sequences=True, dropout=0.1))
# model.add(LSTM(hidden_dim, dropout=0.1))

model.add(layers.Bidirectional(LSTM(hidden_dim, return_sequences=True, dropout=0.4, kernel_regularizer=l2(0.02))))
model.add(layers.Bidirectional(LSTM(hidden_dim, dropout=0.4)))
# model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dropout(0.4))
model.add(layers.Dense(hidden_dim, activation='tanh', kernel_regularizer=l2(0.02), bias_regularizer=l2(0.02)))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(1, activation='sigmoid'))

#since it's a binary classification problem, we use a binary cross entropy loss here
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.Recall()])
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.compile(loss='binary_crossentropy', optimizer='adam')

decay_steps = 2000
learning_rate = 1e-4
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    learning_rate, decay_steps
)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
model.compile(loss=focal_loss(gamma=2., alpha=0.25), optimizer=optimizer)
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='min')
model.summary()

Model: "retrieval_cls_lstm"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 226, 100)          2471600   
                                                                 
 dropout_6 (Dropout)         (None, 226, 100)          0         
                                                                 
 bidirectional_4 (Bidirectio  (None, 226, 400)         481600    
 nal)                                                            
                                                                 
 bidirectional_5 (Bidirectio  (None, 400)              961600    
 nal)                                                            
                                                                 
 dropout_7 (Dropout)         (None, 400)               0         
                                                                 
 dense_4 (Dense)             (None, 200)        

In [36]:
# Train the model

model.fit(padded_train_sequence,train_label_array,epochs=10,validation_data=(padded_dev_sequence, dev_label_array),verbose=True,batch_size=500,callbacks=[earlystopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 9: early stopping


<keras.callbacks.History at 0x21bbd75e760>

In [None]:
# Save the model
# model.save('retrieval_cls_lstm')

# Load the model
# model = tf.keras.models.load_model('retrieval_cls_lstm')

# 3.Testing and Evaluation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [37]:
# Start prediction

dev_predictions = model.predict(padded_dev_sequence, batch_size=64)
test_predictions = model.predict(padded_test_sequence, batch_size=64)




In [38]:
print(dev_predictions[:20])
print(test_predictions[:5])

[[0.03551446]
 [0.03342303]
 [0.07416687]
 [0.05250476]
 [0.03596865]
 [0.05270676]
 [0.02163146]
 [0.04320936]
 [0.10461914]
 [0.03555808]
 [0.07287031]
 [0.04586693]
 [0.04586693]
 [0.02871709]
 [0.06668443]
 [0.08728947]
 [0.04896487]
 [0.07246024]
 [0.03225577]
 [0.08874916]]
[[0.0352916 ]
 [0.08790322]
 [0.05348846]
 [0.09915864]
 [0.21329154]]


In [None]:
def evidences_retrieval(claim_evidence_scores, top_k, evidence_ids):
    sorted_indices = np.argsort(claim_evidence_scores)[::-1]  
    top_indices = sorted_indices[:top_k]
    top_evidence_ids = [evidence_ids[idx] for idx in top_indices]
    return top_evidence_ids

select_evidence_k = 6

dev_predictions = dev_predictions.squeeze()
test_predictions = test_predictions.squeeze()

dev_top_evidence_ids = [evidences_retrieval(scores, select_evidence_k, cleaned_evidence_id) for scores in dev_predictions]
test_top_evidence_ids = [evidences_retrieval(scores, select_evidence_k, cleaned_evidence_id) for scores in test_predictions]

pred_dev_claims = {}
pred_test_claims = {}

for claim_id, evidence_ids in zip(dev_claim_id, dev_top_evidence_ids):  
    dev_claim_data[claim_id]['evidences'] = evidence_ids

for claim_id, evidence_ids in zip(test_claim_id, test_top_evidence_ids):
    test_claim_data[claim_id]['evidences'] = evidence_ids

# Save the updated claims to JSON files
with open('data/dev_predict.json', 'w') as f:
    json.dump(pred_dev_claims, f)

with open('data/test-claims-unlabelled.json', 'w') as f:
    json.dump(pred_test_claims, f)


Scores: 0.03831571713089943
Scores: 0.0344076007604599
Scores: 0.07606448233127594
Scores: 0.05909661203622818
Scores: 0.03946265950798988
Scores: 0.060780346393585205
Scores: 0.023824458941817284
Scores: 0.04805546998977661
Scores: 0.09664904326200485
Scores: 0.04032343626022339
Scores: 0.08162851631641388
Scores: 0.04974227771162987
Scores: 0.04974227771162987
Scores: 0.028740329667925835
Scores: 0.06322941929101944
Scores: 0.0960947573184967
Scores: 0.053747840225696564
Scores: 0.07477974146604538
Scores: 0.03771580010652542
Scores: 0.09471600502729416
Scores: 0.059715110808610916
Scores: 0.033625636249780655
Scores: 0.06984996050596237
Scores: 0.09300383180379868
Scores: 0.061591822654008865
Scores: 0.06541626155376434
Scores: 0.06909685581922531
Scores: 0.05456991121172905
Scores: 0.045888856053352356
Scores: 0.09938683360815048
Scores: 0.025147663429379463
Scores: 0.06210901215672493
Scores: 0.06531631946563721
Scores: 0.02015087753534317
Scores: 0.1495744287967682
Scores: 0.0808

In [None]:
print(pred_dev_claims)
print(test_top_evidence_ids)

{'claim-752': {'claim_text': '[south australia] have the most expensive electricity in the world.', 'claim_label': 'SUPPORTS', 'evidences': ['evidence-89'], 'predicted_evidences': ['evidence-89']}, 'claim-375': {'claim_text': 'when 3 per cent of total annual global emission of carbon dioxide be from human and australia prod\xaduces 1.3 per cent of this 3 per cent, then no amount of emission reductio\xadn here will have any effect on global climate.', 'claim_label': 'NOT_ENOUGH_INFO', 'evidences': ['evidence-89'], 'predicted_evidences': ['evidence-89']}, 'claim-1266': {'claim_text': 'this mean that the world be now 1c warmer than it be in pre-industrial time', 'claim_label': 'SUPPORTS', 'evidences': ['evidence-89'], 'predicted_evidences': ['evidence-89']}, 'claim-871': {'claim_text': '“as it happens, zika may also be a good model of the second worry effect — disease mutation.', 'claim_label': 'NOT_ENOUGH_INFO', 'evidences': ['evidence-89'], 'predicted_evidences': ['evidence-89']}, 'clai

In [None]:
# # Update the dev JSON file
# with open('data/dev_predict.json', 'r') as f:
#     dev_claims = json.load(f)

# for claim_id, evidence_indices in zip(dev_claim_id, dev_top_evidence_indices):
#     top_evidence_ids = [cleaned_evidence_id[idx] for idx in evidence_indices]
#     dev_claims[claim_id]['evidences'] = top_evidence_ids

# with open('data/dev_predict.json', 'w') as f:
#     json.dump(dev_claims, f)


In [None]:
# # Update the test JSON file
# with open('data/test-claims-unlabelled.json', 'r') as f:
#     test_claims = json.load(f)

# for claim_id, evidence_indices in zip(test_claim_id, test_top_evidence_indices):
#     top_evidence_ids = [cleaned_evidence_id[idx] for idx in evidence_indices]
#     test_claims[claim_id]['evidences'] = top_evidence_ids

# with open('data/test-claims-unlabelled.json', 'w') as f:
#     json.dump(test_claims, f)

In [None]:
# %%cmd
# python eval.py --predictions dev-claims-baseline.json --groundtruth dev-claims.json
# python eval.py --predictions dev_predict.json --groundtruth dev-claims.json


import subprocess

# proc = subprocess.Popen(["python", "eval.py", "--predictions", "data\dev_predict.json", "--groundtruth", "data\dev-claims.json"
# ], stdout=subprocess.PIPE, shell=True)
# (out, err) = proc.communicate()
# print(str(out))

# 高自动化模型/预处理选择，可以自动读取准确度
output = subprocess.check_output("python eval.py --predictions data/dev_predict.json --groundtruth data/dev-claims.json", shell=True)
output_str = output.decode('utf-8')

# Split the output into lines
output_lines = output_str.strip().split('\n')

# Format the output
formatted_lines = []
for line in output_lines:
    metric, value = line.split('=')
    metric = metric.strip()
    value = value.strip()
    formatted_line = f"{metric}: {value}"
    formatted_lines.append(formatted_line)

# Join the formatted lines into a single string
formatted_output = '\n'.join(formatted_lines)
print(formatted_output)

Evidence Retrieval F-score (F): 0.0
Claim Classification Accuracy (A): 1.0
Harmonic Mean of F and A: 0.0


## Object Oriented Programming codes here

*You can use multiple code snippets. Just add more if needed*