In [45]:
import pandas as pd
from ast import literal_eval
import numpy as np

In [46]:
ANNOTATORS_PATH = '..\\annotator_labels\paragraph_gold_annotator.csv'
LLAMA_PATH = '..\ollama\llama3-instruct-8b-q4\llama3_prediction.csv'
MISTRAL_PATH = '..\ollama\mistral_7b-instruct-q4_K_M\mistral_prediction.csv'

In [47]:
annotators_df = pd.read_csv(ANNOTATORS_PATH, index_col=False)
annotators_df['annotator_paragraph'] = annotators_df['annotator_paragraph'].apply(lambda x: [int(i) for i in str(x).strip('[]').split() if i != 'nan'])

llama_df = pd.read_csv(LLAMA_PATH, index_col=False)
mistral_df = pd.read_csv(MISTRAL_PATH, index_col=False)

In [48]:
labels = annotators_df['label']
annotators = annotators_df['annotator_paragraph']
llama = llama_df['llama3_labels'].apply(literal_eval)
mistral = mistral_df['mistral_labels'].apply(literal_eval)

In [49]:
# remove the last text
annotators = annotators[:157]
labels = labels[:157]
llama = llama[:157]
mistral = mistral[:157]

In [50]:
def calculate_percentages(labels, annotators, llm):
    ann_percentage = []
    llm_percentage = []
    for i in range(len(labels)):
        l = len(annotators[i])
        llm[i] = llm[i][:l]
        
        if labels[i] == 0:
            ann_percentage.append(1-np.mean(annotators[i]))
            llm_percentage.append(llm[i].count(0)/l)
        else:
            ann_percentage.append(np.mean(annotators[i]))
            llm_percentage.append(llm[i].count(1)/l)
    return ann_percentage, llm_percentage

In [51]:
ann_percentage, llama_percentage = calculate_percentages(labels, annotators, llama)

In [52]:
ann_percentage1, mistral_percentage = calculate_percentages(labels, annotators, mistral)

In [53]:
assert ann_percentage == ann_percentage1

In [54]:
text_ids = annotators_df['text-id']
text_ids = text_ids[:157]

In [55]:
df = pd.DataFrame({
    'text_ids': text_ids,
    'labels': labels,
    'annotators_percentage': ann_percentage,
    'llama_percentage': llama_percentage,
    'mistral_percentage': mistral_percentage
})

df.to_csv('percentages.csv', index=False)