In [None]:
!pip install sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_distances
import numpy
from sklearn.metrics import f1_score,precision_score,recall_score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sklearn
  Downloading sklearn-0.0.post1.tar.gz (3.6 kB)
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25l[?25hdone
  Created wheel for sklearn: filename=sklearn-0.0.post1-py3-none-any.whl size=2344 sha256=20d6aebc7e9b1f19c43145829ba82197dde1aadab5df234fa1e04d8c2e59a12d
  Stored in directory: /root/.cache/pip/wheels/42/56/cc/4a8bf86613aafd5b7f1b310477667c1fca5c51c3ae4124a003
Successfully built sklearn
Installing collected packages: sklearn
Successfully installed sklearn-0.0.post1


##1) Installations and imports


a. Mount drive (if you are running on colab)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


b. Clone or update competition repository
After cloning, under MyDrive, you will see NLI4CT-semeval-2023 folder with the training and dev set, aswell as the full list of CTRs.

In [None]:
%cd /content/drive/MyDrive

import os

PROJECT_DIR = '/content/drive/MyDrive/NLI4CT-semeval-2023'
PROJECT_GITHUB_URL = 'https://github.com/ai-systems/nli4ct.git'

if not os.path.isdir(PROJECT_DIR):
  !git clone {PROJECT_GITHUB_URL}
else:
  %cd {PROJECT_DIR}
  !git pull {PROJECT_GITHUB_URL}

/content/drive/MyDrive
fatal: destination path 'nli4ct' already exists and is not an empty directory.


##2) Dataset

In [None]:
# Training data
!unzip /content/drive/MyDrive/nli4ct/training_data.zip

Archive:  /content/drive/MyDrive/nli4ct/training_data.zip
replace training_data/.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
# Dev set
import json

dev_path = "/content/drive/MyDrive/training_data/dev.json"
with open(dev_path) as json_file:
    dev = json.load(json_file)

# Example instance
print(dev[list(dev.keys())[1]])

{'Type': 'Comparison', 'Section_id': 'Eligibility', 'Primary_id': 'NCT00425854', 'Secondary_id': 'NCT01224678', 'Statement': 'Patients with significantly elevated ejection fraction are excluded from the primary trial, but can still be eligible for the secondary trial if they are 55 years of age or over', 'Label': 'Contradiction', 'Primary_evidence_index': [15], 'Secondary_evidence_index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}


In [None]:
uuid_list = list(dev.keys())
statements = []
gold_dev_primary_evidence = []
gold_dev_secondary_evidence = []
for i in range(len(uuid_list)):
  #Retrieve all statements from the development set
  statements.append(dev[uuid_list[i]]["Statement"])

##3) TF-IDF Entailment prediction baseline

In [None]:
Results = {}

for i in range(len(uuid_list)):
  primary_ctr_path = os.path.join("/content/drive/MyDrive/training_data/CT json",dev[uuid_list[i]]["Primary_id"]+".json")
  with open(primary_ctr_path) as json_file:
    primary_ctr = json.load(json_file)
  
  #Retrieve the full section from the primary trial
  primary_section = primary_ctr[dev[uuid_list[i]]["Section_id"]]

  #Convert a primary section entries to a matrix of TF-IDF features.
  vectorizer = TfidfVectorizer().fit(primary_section)
  X_s = vectorizer.transform([statements[i]])
  X_p = vectorizer.transform(primary_section)
  #Compute the cosine similarity between the primary section entries and the statement
  primary_scores = cosine_distances(X_s, X_p)
  #Repeat for the secondary trial
  if dev[uuid_list[i]]["Type"] == "Comparison":
    secondary_ctr_path = os.path.join("/content/drive/MyDrive/training_data/CT json",dev[uuid_list[i]]["Secondary_id"]+".json")
    with open(secondary_ctr_path) as json_file:
      secondary_ctr = json.load(json_file)
    secondary_section = secondary_ctr[dev[uuid_list[i]]["Section_id"]]
    vectorizer = TfidfVectorizer().fit(secondary_section)
    X_s = vectorizer.transform([statements[i]])
    X_p = vectorizer.transform(secondary_section)
    secondary_scores = cosine_distances(X_s, X_p)
    #Combine and average the cosine distances of all entries from the relevant section of the primary and secondary trial
    combined_scores = []
    combined_scores.extend(secondary_scores[0])
    combined_scores.extend(primary_scores[0])
    score = numpy.average(combined_scores)
    #If the cosine distance is gless than 0.9 the prediction is entailment   
    if score > 0.9:
      Prediction = "Contradiction"
    else:
      Prediction = "Entailment"
    Results[str(uuid_list[i])] = {"Prediction":Prediction}
  else:
    #If the cosine distance is greater than 0.9 the prediction is contradiction
    score = numpy.average(primary_scores)
    if score > 0.9:
      Prediction = "Contradiction"
    else:
      Prediction = "Entailment"
    Results[str(uuid_list[i])] = {"Prediction":Prediction}


## Save the results in the submission format.

In [None]:
print(Results)
with open("/content/drive/MyDrive/nli4ct/results.json",'w') as jsonFile:
    jsonFile.write(json.dumps(Results,indent=4))

{'1adc970c-d433-44d0-aa09-d3834986f7a2': {'Prediction': 'Contradiction'}, '6b9162d0-0816-46d4-81af-c60028dcc63b': {'Prediction': 'Entailment'}, '0b6cc8e3-69ee-4a91-b93d-2ad3fddce65f': {'Prediction': 'Contradiction'}, 'cc1f712a-2116-4e40-9810-f315e3fa5ff8': {'Prediction': 'Entailment'}, '904061c0-14fa-4f13-9118-9a41e24fa8eb': {'Prediction': 'Entailment'}, '43ee7645-ce1e-42d5-9a74-3e379f6f367b': {'Prediction': 'Contradiction'}, '0cef8c8e-7986-46c7-a597-c5733a9899c0': {'Prediction': 'Contradiction'}, '43ce26e5-03fa-4e9d-b0eb-6ea356295753': {'Prediction': 'Contradiction'}, '3facad41-0221-42f8-834d-470e65c4aad5': {'Prediction': 'Entailment'}, '9cbc00e9-3a2d-4471-a93e-72c95132fb6a': {'Prediction': 'Entailment'}, '8b91cab9-d858-45f3-bf8d-3d6fc55b4818': {'Prediction': 'Entailment'}, '4a75574c-fa86-4e62-a210-81c7b98a3807': {'Prediction': 'Contradiction'}, 'd0b50aeb-aad8-4a8d-aae6-5c58a7d382c7': {'Prediction': 'Entailment'}, 'b0b61978-57db-4a1c-812c-509e8b05f2dc': {'Prediction': 'Contradiction'}

##4) Evaluation

Run the task 1 evaluation script.

In [None]:
def main():

    gold = dev
    results = Results  
    uuid_list = list(results.keys())

    results_pred = []
    gold_labels = []
    for i in range(len(uuid_list)):
        if results[uuid_list[i]]["Prediction"] == "Entailment":
            results_pred.append(1)
        else:
            results_pred.append(0)
        if gold[uuid_list[i]]["Label"] == "Entailment":
            gold_labels.append(1)
        else:
            gold_labels.append(0)

    f_score = f1_score(gold_labels,results_pred)
    p_score = precision_score(gold_labels,results_pred)
    r_score = recall_score(gold_labels,results_pred)

    print('F1:{:f}'.format(f_score))
    print('precision_score:{:f}'.format(p_score))
    print('recall_score:{:f}'.format(r_score))

if '__main__' == __name__:
    main()

F1:0.502415
precision_score:0.485981
recall_score:0.520000
