In [16]:
import pandas as pd
import glob
import sys
import polars as pl
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from utils_iaa import analyze_all_cases, analyze_cases, process_annotations_and_disagreements, compute_oa, cohen_kappa_pairs, compute_cohen_kappa_all_combinations, get_lists_from_columns, compute_observed_agreement_combinations

# Linguistic Synesthesia - yes or no? Computing IAA on backtranslations

## Computing IAA

In [2]:
#reading the files
backtranslations_annotations = {} #creating a df for each annotated file with translated with LLM
for filename in glob.glob("./data/backtranslations/all_annotators*"):
    name = filename.split('/')[-1].split('.')[0].split('annotators_')[-1] #name of the df of each file
    print(name)
    backtranslations_annotations[name] = pd.read_csv(filename, sep=",")

DE_gpt
DE_google
ES_gpt
ES_google
DE_deepL
ES_deepL


In [3]:
backtranslations_annotations['ES_gpt'].head()

Unnamed: 0,instance,R,H,A,annotator-llm
0,A bored head among windy spaces.,No,No,No,no
1,A fine and pedantic sun with a satin vest.,No,No,No,yes
2,A silence permeates the story and a softness t...,Yes,No,No,yes
3,And painful must be the storm,Yes,No,No,no
4,"But through the pale, thin water, the harmless...",No,No,No,yes


## Cohen's Kappa

In [4]:
columns = ['R', 'H', 'A', 'annotator-llm']
for name, df in backtranslations_annotations.items():
    print(f'IAA - {name}')
    print()
    analyze_all_cases(df, columns)

IAA - DE_gpt

All instances' Cohen's kappa and IAA (all cases):

Cohen's kappa between R and H: 0.0
Cohen's kappa between R and A: 0.49
Cohen's kappa between R and annotator-llm: 0.2
Cohen's kappa between H and A: 0.43
Cohen's kappa between H and annotator-llm: 0.14
Cohen's kappa between A and annotator-llm: 0.17

Inter-Annotator Agreement (Cohen's kappa) score - all cases: 0.24
----------------------------------------------
IAA - DE_google

All instances' Cohen's kappa and IAA (all cases):

Cohen's kappa between R and H: 0.22
Cohen's kappa between R and A: 0.18
Cohen's kappa between R and annotator-llm: 0.41
Cohen's kappa between H and A: 0.0
Cohen's kappa between H and annotator-llm: -0.05
Cohen's kappa between A and annotator-llm: 0.25

Inter-Annotator Agreement (Cohen's kappa) score - all cases: 0.17
----------------------------------------------
IAA - ES_gpt

All instances' Cohen's kappa and IAA (all cases):

Cohen's kappa between R and H: 0.59
Cohen's kappa between R and A: 0.79


## Fleiss Kappa

In [5]:
for name, df in backtranslations_annotations.items():
    print(f'IAA - {name}')
    print()
    analyze_all_cases(df, columns,iaa='fleiss')

IAA - DE_gpt

Inter-Annotator Agreement (Fleiss Kappa) score - all cases: 0.01
----------------------------------------------
IAA - DE_google

Inter-Annotator Agreement (Fleiss Kappa) score - all cases: -0.07
----------------------------------------------
IAA - ES_gpt

Inter-Annotator Agreement (Fleiss Kappa) score - all cases: 0.15
----------------------------------------------
IAA - ES_google

Inter-Annotator Agreement (Fleiss Kappa) score - all cases: 0.07
----------------------------------------------
IAA - DE_deepL

Inter-Annotator Agreement (Fleiss Kappa) score - all cases: 0.05
----------------------------------------------
IAA - ES_deepL

Inter-Annotator Agreement (Fleiss Kappa) score - all cases: 0.02
----------------------------------------------


## Observed Agreement

In [6]:
for name, df in backtranslations_annotations.items():
    print(f'IAA - {name}')
    print()
    analyze_all_cases(df, columns,iaa='oa')

IAA - DE_gpt

All instances' Observed Agreement and IAA (all cases):

Observed agreement between R and H: 0.57
Observed agreement between R and A: 0.77
Observed agreement between R and annotator-llm: 0.63
Observed agreement between H and A: 0.73
Observed agreement between H and annotator-llm: 0.6
Observed agreement between A and annotator-llm: 0.6

Inter-Annotator Agreement (Observed Agreement) score - all cases: 0.65
----------------------------------------------
IAA - DE_google

All instances' Observed Agreement and IAA (all cases):

Observed agreement between R and H: 0.6
Observed agreement between R and A: 0.57
Observed agreement between R and annotator-llm: 0.7
Observed agreement between H and A: 0.63
Observed agreement between H and annotator-llm: 0.5
Observed agreement between A and annotator-llm: 0.67

Inter-Annotator Agreement (Observed Agreement) score - all cases: 0.61
----------------------------------------------
IAA - ES_gpt

All instances' Observed Agreement and IAA (all

## Save IAA for future use

In [8]:
# Save the current standard output
original_stdout = sys.stdout

# Specify the file path where you want to save the output
output_file_path = 'iaa.txt'

# Open the file in write mode
with open(output_file_path, 'a') as f:
    # Redirect standard output to the file
    sys.stdout = f
    print("IAA on Linguistic Synesthesia - Yes or No Backtranslations")
    print()
    # Call the function
    for name, df in backtranslations_annotations.items():
        print(f'IAA - {name}')
        print()
        analyze_all_cases(df, columns)
        analyze_all_cases(df, columns,iaa='fleiss')
        analyze_all_cases(df, columns,iaa='oa')
    print("----------------------------------------------")

# Restore the original standard output
sys.stdout = original_stdout

print(f"IAA function output saved to {output_file_path}")

IAA function output saved to iaa.txt
