In [None]:
import pandas as pd
import os
import random 
from difflib import Differ
import nltk
from spacy.lang.en import English

random.seed(42)

def flip_sentiment(x):
    if x == 'Negative':
        return 'Positive'
    elif x == 'Positive':
        return 'Negative'
    else:
        raise ValueError


def compare_sentences(sentence1, sentence2):
    differ = Differ()
    diff = list(differ.compare(sentence1.split(), sentence2.split()))

    added_words = [word[2:] for word in diff if word.startswith('+ ')]
    removed_words = [word[2:] for word in diff if word.startswith('- ')]

    return added_words, removed_words



def score_minimality(orig_sent: str, edited_sent: str, normalized: bool = True) -> float:
        """
          Calculate Levenshtein distance(token-level) indicating the minimality of changes between two sentences.
          This method takes in an original sentence and an edited sentence, both as strings.
          It calculates the Levenshtein edit distance between the tokenized versions of these sentences,
          representing the minimum number of single-token edits needed to transform one into the other.
          Parameters:
          - orig_sent (str): The original sentence before editing.
          - edited_sent (str): The edited version of the sentence.
          - normalized (bool, optional): If True, returns a normalized score relative to the length of
            the original sentence. If False, returns the raw edit distance value.
          Returns:
          - float: The calculated minimality score. If ‘normalized’ is True, the score represents the
            proportion of changes relative to the original sentence length.u
            Source:
          """
        nlp = English()
        tokenizer = nlp.tokenizer
        tokenized_original = [t.text for t in tokenizer(orig_sent)]
        tokenized_edited = [t.text for t in tokenizer(edited_sent)]
        levenshtein_dist = nltk.edit_distance(tokenized_original, tokenized_edited)
        if normalized:
            return levenshtein_dist / len(tokenized_original)
        else:
            return levenshtein_dist


def compute_dist(s1, s2):
    #assert((df[SENT_COLUMN] != df[CF_SENT_COLUMN]).all())
    assert len(s1) == len(s2)
    dist = []

    for x, y in zip(s1,s2):
            dist.append(score_minimality(x, y))
    return dist


In [None]:
SPLITS = ['test']
TASK = 'sentiment'
TASK_expert = 'IMDb'
#LLM = 'llama2-20231209'
split_name = SPLITS[0]

LLMS = ['gpt3.5-20240313', 'gpt4-20240318', 'llama2_70b-20240318', 'llama2-20231209', 'mistral_56b-20240320', 'mistral-20240118']
#LLMS = LLMS[]
#LST = [LLM.split('-')[0] if x=='LLM' else x for x in LST]
for LLM in LLMS:

    df = pd.read_csv('../counterfactually-augmented-data/{}/combined/paired/{}_paired.tsv'.format(TASK, split_name), sep='\t')

    df_crowd = df.iloc[::2].reset_index(drop=True)  # Select rows with even indices
    df_crowd_cfs = df.iloc[1::2].reset_index(drop=True)  # Select rows with odd indice



    df_expert = pd.read_csv('../contrast-sets/{}/data/{}_original.tsv'.format(TASK_expert, split_name), sep='\t')
    df_expert_cfs = pd.read_csv('../contrast-sets/{}/data/{}_contrast.tsv'.format(TASK_expert, split_name), sep='\t')



    df_llm = pd.read_csv('../llms-ppl-preds/{}/{}/orig/{}.tsv'.format(LLM, TASK, split_name), 
    sep='\t')

    cfs_path = '../llms-ppl-preds/{}/{}/new/{}.tsv'.format(LLM, TASK, split_name)
    df_llm_cfs = pd.read_csv(cfs_path, sep='\t')

    assert(len(df_llm) == len(df_llm_cfs))
    print(LLM)
    print(df_llm.iloc[-1]['Text'])
    print(df_llm_cfs.iloc[-1]['Text'])
    print('-'*20)
    
    df_llm_cfs['dist'] = compute_dist(df_llm_cfs['Text'], df_llm['Text'])

    df_llm_cfs.to_csv(cfs_path, sep='\t', index=False)

In [None]:
df = pd.read_csv('../counterfactually-augmented-data/{}/combined/paired/{}_paired.tsv'.format(TASK, split_name), sep='\t')

df_crowd = df.iloc[::2].reset_index(drop=True)  # Select rows with even indices
df_crowd_cfs = df.iloc[1::2].reset_index(drop=True)  # Select rows with odd indice



preds_path = '../llms-ppl-preds/counterfactually-augmented-data/{}/new/{}.tsv'.format(TASK, split_name)
df_preds = pd.read_csv(preds_path, sep='\t')

assert(len(df_crowd_cfs) == len(df_preds))

df_preds['dist'] = compute_dist(df_crowd['Text'], df_crowd_cfs['Text'])
df_preds.to_csv(preds_path, sep= '\t', index=False)

In [None]:
df_expert = pd.read_csv('../contrast-sets/{}/data/{}_original.tsv'.format('IMDb', split_name), sep='\t')
df_expert_cfs = pd.read_csv('../contrast-sets/{}/data/{}_contrast.tsv'.format('IMDb', split_name), sep='\t')



preds_path = '../llms-ppl-preds/contrast-sets/IMDb/data/{}.tsv'.format(split_name)
df_preds_expert = pd.read_csv(preds_path, sep='\t')

assert(len(df_expert) == len(df_preds_expert))

df_preds_expert['dist'] = compute_dist(df_crowd['Text'], df_crowd_cfs['Text'])
df_preds_expert

In [None]:
df_preds_expert.to_csv(preds_path, sep= '\t', index=False)