In [1]:
!pip install sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_distances
import numpy
from sklearn.metrics import f1_score,precision_score,recall_score

Collecting sklearn
  Downloading sklearn-0.0.post7.tar.gz (3.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25l[?25hdone
  Created wheel for sklearn: filename=sklearn-0.0.post7-py3-none-any.whl size=2951 sha256=a919c2867f040e9e54bbf91948a88e9182a7dd3a2df306fa33363898d29df2b2
  Stored in directory: /root/.cache/pip/wheels/c8/9c/85/72901eb50bc4bc6e3b2629378d172384ea3dfd19759c77fd2c
Successfully built sklearn
Installing collected packages: sklearn
Successfully installed sklearn-0.0.post7


##1) Installations and imports


a. Mount drive (if you are running on colab)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


b. Clone or update competition repository
After cloning, under MyDrive, you will see NLI4CT-semeval-2023 folder with the training and dev set, aswell as the full list of CTRs.

In [4]:
%cd /content/drive/MyDrive

import os

PROJECT_DIR = '/content/drive/MyDrive/semeval-2024'
PROJECT_GITHUB_URL = 'https://github.com/ai-systems/Task-2-SemEval-2024.git'

if not os.path.isdir(PROJECT_DIR):
  !git clone {PROJECT_GITHUB_URL}
else:
  %cd {PROJECT_DIR}
  !git pull {PROJECT_GITHUB_URL}

/content/drive/MyDrive
Cloning into 'Task-2-SemEval-2024'...
remote: Enumerating objects: 17, done.[K
remote: Counting objects: 100% (17/17), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 17 (delta 1), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (17/17), 2.11 MiB | 5.79 MiB/s, done.
Resolving deltas: 100% (1/1), done.


##2) Dataset

In [5]:
# Training data
!unzip /content/drive/MyDrive/Task-2-SemEval-2024/training_data.zip

Archive:  /content/drive/MyDrive/Task-2-SemEval-2024/training_data.zip
   creating: training_data/
  inflating: __MACOSX/._training_data  
  inflating: training_data/.DS_Store  
replace __MACOSX/training_data/._.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: __MACOSX/training_data/._.DS_Store  
   creating: training_data/CT json/
  inflating: __MACOSX/training_data/._CT json  
  inflating: training_data/train.json  
replace __MACOSX/training_data/._train.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: __MACOSX/training_data/._train.json  
  inflating: training_data/CT json/NCT01997333.json  
  inflating: __MACOSX/training_data/CT json/._NCT01997333.json  
  inflating: training_data/CT json/NCT02725801.json  
  inflating: __MACOSX/training_data/CT json/._NCT02725801.json  
  inflating: training_data/CT json/NCT00129389.json  
  inflating: __MACOSX/training_data/CT json/._NCT00129389.json  
  inflating: training_data/CT json/NCT01106898.json  
  inflating: __MA

In [None]:
# Dev set
import json

dev_path = "/content/drive/MyDrive/training_data/dev.json"
with open(dev_path) as json_file:
    dev = json.load(json_file)

# Example instance
print(dev[list(dev.keys())[1]])

In [None]:
uuid_list = list(dev.keys())
statements = []
gold_dev_primary_evidence = []
gold_dev_secondary_evidence = []
for i in range(len(uuid_list)):
  #Retrieve all statements from the development set
  statements.append(dev[uuid_list[i]]["Statement"])

##3) TF-IDF Entailment prediction baseline

In [None]:
Results = {}

for i in range(len(uuid_list)):
  primary_ctr_path = os.path.join("/content/drive/MyDrive/training_data/CT json",dev[uuid_list[i]]["Primary_id"]+".json")
  with open(primary_ctr_path) as json_file:
    primary_ctr = json.load(json_file)

  #Retrieve the full section from the primary trial
  primary_section = primary_ctr[dev[uuid_list[i]]["Section_id"]]

  #Convert a primary section entries to a matrix of TF-IDF features.
  vectorizer = TfidfVectorizer().fit(primary_section)
  X_s = vectorizer.transform([statements[i]])
  X_p = vectorizer.transform(primary_section)
  #Compute the cosine similarity between the primary section entries and the statement
  primary_scores = cosine_distances(X_s, X_p)
  #Repeat for the secondary trial
  if dev[uuid_list[i]]["Type"] == "Comparison":
    secondary_ctr_path = os.path.join("/content/drive/MyDrive/training_data/CT json",dev[uuid_list[i]]["Secondary_id"]+".json")
    with open(secondary_ctr_path) as json_file:
      secondary_ctr = json.load(json_file)
    secondary_section = secondary_ctr[dev[uuid_list[i]]["Section_id"]]
    vectorizer = TfidfVectorizer().fit(secondary_section)
    X_s = vectorizer.transform([statements[i]])
    X_p = vectorizer.transform(secondary_section)
    secondary_scores = cosine_distances(X_s, X_p)
    #Combine and average the cosine distances of all entries from the relevant section of the primary and secondary trial
    combined_scores = []
    combined_scores.extend(secondary_scores[0])
    combined_scores.extend(primary_scores[0])
    score = numpy.average(combined_scores)
    #If the cosine distance is gless than 0.9 the prediction is entailment
    if score > 0.9:
      Prediction = "Contradiction"
    else:
      Prediction = "Entailment"
    Results[str(uuid_list[i])] = {"Prediction":Prediction}
  else:
    #If the cosine distance is greater than 0.9 the prediction is contradiction
    score = numpy.average(primary_scores)
    if score > 0.9:
      Prediction = "Contradiction"
    else:
      Prediction = "Entailment"
    Results[str(uuid_list[i])] = {"Prediction":Prediction}


## Save the results in the submission format.

In [None]:
print(Results)
with open("/content/drive/MyDrive/Task-2-SemEval-2024/results.json",'w') as jsonFile:
    jsonFile.write(json.dumps(Results,indent=4))

##4) Evaluation

Compute F1 score, Precision, and Recall. Note that in the final evaluation systems will be ranked by Faithfulness and Consistency, which cannot be computed on the training and development set.

In [None]:
def main():

    gold = dev
    results = Results
    uuid_list = list(results.keys())

    results_pred = []
    gold_labels = []
    for i in range(len(uuid_list)):
        if results[uuid_list[i]]["Prediction"] == "Entailment":
            results_pred.append(1)
        else:
            results_pred.append(0)
        if gold[uuid_list[i]]["Label"] == "Entailment":
            gold_labels.append(1)
        else:
            gold_labels.append(0)

    f_score = f1_score(gold_labels,results_pred)
    p_score = precision_score(gold_labels,results_pred)
    r_score = recall_score(gold_labels,results_pred)

    print('F1:{:f}'.format(f_score))
    print('precision_score:{:f}'.format(p_score))
    print('recall_score:{:f}'.format(r_score))

if '__main__' == __name__:
    main()