In [None]:
##################################################
# REMEMBER TO CHANGE THE INPUT & OUTPUT FILENAMES
##################################################

# Install Python packages and alternative "SentencePiece" tokenizer (instead of the BERT tokenizer)
!pip install datasets evaluate transformers[sentencepiece]

# Set up the environment
import numpy as np
import pandas as pd
from transformers import pipeline
from tqdm import tqdm

In [None]:
# Load the classifier
classifier = pipeline("zero-shot-classification", model="Narsil/deberta-large-mnli-zero-cls") # MODEL 3 

# classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli") MODEL 2
# classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") MODEL 1

In [None]:
# Load the dataset (upload csv file first)
results_table = pd.read_csv('complete_dataset.csv')

In [None]:
# Define a function to calculate the scores for each row
def calculate_scores(row):
    # Extract the text from the "Meaning" column
    text = row["Meaning"]
    # Concatenate the target keywords for this row
    target_keywords = [row['Target_1'], row['Target_2'], row['Target_3']]
    # Make a prediction for each target keyword
    scores = []
    for keyword in target_keywords:
        prediction = classifier(text, candidate_labels=keyword)
        score = prediction['scores'][0]
        scores.append(score)
    return pd.Series(scores)

In [None]:
# Apply the function to the dataset to calculate the scores for each row
scores_list = []
for i, row in tqdm(results_table.iterrows(), total=len(results_table)):
    scores_list.append(calculate_scores(row))

In [None]:
# Convert the list of scores to a table
scores_table = pd.DataFrame(scores_list)

In [None]:
# Rename the columns of the scores table to match the target keywords
scores_table.columns = ['Target_1_Score', 'Target_2_Score', 'Target_3_Score']

In [None]:
# Concatenate the original dataset with the scores table to create a new dataset with the calculated scores
new_table = pd.concat([results_table, scores_table], axis=1)

# Save the new dataset as a CSV file
new_table.to_csv('complete_dataset_with_scores.csv', index=False)