In [None]:
import pandas as pd
import os
import random 
from difflib import Differ
import nltk
from spacy.lang.en import English
import numpy as np


random.seed(42)

LST = ['crowd', 'expert', 'LLM']

def score_minimality(orig_sent: str, edited_sent: str, normalized: bool = True) -> float:
        """
          Calculate Levenshtein distance(token-level) indicating the minimality of changes between two sentences.
          This method takes in an original sentence and an edited sentence, both as strings.
          It calculates the Levenshtein edit distance between the tokenized versions of these sentences,
          representing the minimum number of single-token edits needed to transform one into the other.
          Parameters:
          - orig_sent (str): The original sentence before editing.
          - edited_sent (str): The edited version of the sentence.
          - normalized (bool, optional): If True, returns a normalized score relative to the length of
            the original sentence. If False, returns the raw edit distance value.
          Returns:
          - float: The calculated minimality score. If ‘normalized’ is True, the score represents the
            proportion of changes relative to the original sentence length.u
            Source:
          """
        nlp = English()
        tokenizer = nlp.tokenizer
        tokenized_original = [t.text for t in tokenizer(orig_sent)]
        tokenized_edited = [t.text for t in tokenizer(edited_sent)]
        levenshtein_dist = nltk.edit_distance(tokenized_original, tokenized_edited)
        if normalized:
            return levenshtein_dist / len(tokenized_original)
        else:
            return levenshtein_dist


def compute_dist(s1, s2):
    #assert((df[SENT_COLUMN] != df[CF_SENT_COLUMN]).all())
    assert len(s1) == len(s2)
    dist = []

    for x, y in zip(s1,s2):
            dist.append(score_minimality(x, y))
    return dist

In [None]:
SPLITS = ['test']
split_name = SPLITS[0]


path_raw = '../llms-raw/{}/NLI/{}/{}.csv'
path_preds = '../llms-ppl-preds/{}/NLI/{}/{}.tsv'

lst_dfs = []
LLMS = ['gpt3.5-20240313', 'gpt4-20240318', 'llama2_70b-20240318', 'llama2-20231209', 'mistral_56b-20240320', 'mistral-20240118']

for LLM in LLMS:


    df_llm_hypothesis = pd.read_csv(path_raw.format(LLM, 'revised_hypothesis', split_name))
    df_llm_premise = pd.read_csv(path_raw.format(LLM, 'revised_premise', split_name))

    for df in [df_llm_hypothesis, df_llm_premise]:
        df.replace('', np.nan, inplace=True)   
        df.dropna(inplace=True)
    
    df_llm_hypothesis_preds = pd.read_csv(path_preds.format(LLM, 'revised_hypothesis', split_name), sep='\t')
    df_llm_premise_preds = pd.read_csv(path_preds.format(LLM, 'revised_premise', split_name), sep='\t')



    assert len(df_llm_hypothesis) == len(df_llm_hypothesis_preds)
    assert len(df_llm_premise) == len(df_llm_premise_preds)

    premise_f = df_llm_premise.apply(lambda x: x['original_sentence1'] + ' ' + x['original_sentence2'], axis=1)
    premise_cf = df_llm_premise.apply(lambda x: x['contrast text'] + ' ' + x['original_sentence2'], axis=1)


    hpyothesis_f = df_llm_hypothesis.apply(lambda x: x['original_sentence1'] + ' ' + x['original_sentence2'], axis=1)
    hypothesis_cf = df_llm_hypothesis.apply(lambda x: x['original_sentence1'] + ' ' + x['contrast text'], axis=1)

    dist_premise = compute_dist(premise_f, premise_cf)
    dist_hypothesis = compute_dist(hpyothesis_f, hypothesis_cf)


    df_llm_hypothesis_preds['dist'] = dist_hypothesis
    df_llm_premise_preds['dist'] = dist_premise
    
    df_llm_premise_preds.to_csv(path_preds.format(LLM, 'revised_premise', split_name), sep='\t', index = False)
    df_llm_hypothesis_preds.to_csv(path_preds.format(LLM, 'revised_hypothesis', split_name), sep='\t', index = False)
