In [1]:
# Install necessary libraries
import sys
libraries = ["polars", "openai", "instructor", "pydantic"]

for library in libraries:
    try:
        __import__(library)
        print(f"{library} is already installed.")
    except ImportError:
        print(f"{library} not found, installing...")
        !{sys.executable} -m pip install {library}


polars is already installed.
openai is already installed.
instructor is already installed.
pydantic is already installed.


In [2]:
import glob
import pandas as pd
from utils_iaa import transpose_dataframe, rename_columns_with_prefix, prepare_df
import polars as pl
from llm_utils import annotate_synesthesia

# Linguistic Synesthesia - yes or no?: Annotating backtranslations with LLM

## Reading and preparing the files

In [3]:
#reading the files
backtranslations_annotations = {} #creating a df for each annotated file with translated with LLM
for filename in glob.glob("./data/backtranslations/translations*"):
    name = filename.split('/')[-1].split('.')[0].split('annotations_')[-1] #name of the df of each file
    print(name)
    backtranslations_annotations[name] = pd.read_csv(filename, sep=",")

ES_gpt
DE_gpt
ES_google
ES_deepL
DE_deepL
DE_google


In [4]:
# operate on DataFrame 'df' for file each file
result = {}
for name, df in backtranslations_annotations.items():
    df = prepare_df(df)
    data = pl.DataFrame(df) #converting to polars df for the next steps
    result[name] = data

## Annotating with LLM

In [5]:
# Adding a placeholder column for the annotation
annotate_with_llm = []
for df in result.values():
    data = df.with_columns(pl.Series("annotator-llm", range(0, df.height)))
    annotate_with_llm.append(data)

In [6]:
my_instructions =  """Linguistic synaesthesia involves conceptual conflicts created by two concepts from two distinct sensory domains. 
In most cases, linguistic synaesthesia is realized when a sensory lexeme from one sensory domain describes another sensory lexeme from a different sensory domain. 
For example, in a phrase sweet voice, sweet is a concept originated in TASTE while voice is a HEARING concept. (Zhong and Ahrens, 2021)
Here we aim to identify the whether instances selected from poetry from different time periods contain synesthesia or not.

Do you idenfity lingustic synesthesia in the following text?"""


In [7]:
list_annotations = []
for data in annotate_with_llm:
    annotation_dict = annotate_synesthesia(data, my_instructions)
    list_annotations.append(annotation_dict)

 --- I HAVE ANNOTATED TEXT 0 ---
Linguistic synaesthesia involves conceptual conflicts created by two concepts from two distinct sensory domains. 
In most cases, linguistic synaesthesia is realized when a sensory lexeme from one sensory domain describes another sensory lexeme from a different sensory domain. 
For example, in a phrase sweet voice, sweet is a concept originated in TASTE while voice is a HEARING concept. (Zhong and Ahrens, 2021)
Here we aim to identify the whether instances selected from poetry from different time periods contain synesthesia or not.

Do you idenfity lingustic synesthesia in the following text? A bored head among windy spaces.
option='no'


 --- I HAVE ANNOTATED TEXT 1 ---
Linguistic synaesthesia involves conceptual conflicts created by two concepts from two distinct sensory domains. 
In most cases, linguistic synaesthesia is realized when a sensory lexeme from one sensory domain describes another sensory lexeme from a different sensory domain. 
For exampl

In [8]:
# Put the annotation in the dataframe and save
for df, annotation_dict, name in zip(annotate_with_llm,list_annotations,result.keys()):
    data = df.with_columns(pl.col("annotator-llm").replace(annotation_dict))
    data.write_csv(f"./data/backtranslations/all_annotators_{name}.csv")
    print(data)

shape: (30, 5)
┌─────────────────────────────────┬─────┬─────┬─────┬───────────────┐
│ instance                        ┆ R   ┆ H   ┆ A   ┆ annotator-llm │
│ ---                             ┆ --- ┆ --- ┆ --- ┆ ---           │
│ str                             ┆ str ┆ str ┆ str ┆ str           │
╞═════════════════════════════════╪═════╪═════╪═════╪═══════════════╡
│ A bored head among windy space… ┆ No  ┆ No  ┆ No  ┆ no            │
│ A fine and pedantic sun with a… ┆ No  ┆ No  ┆ No  ┆ yes           │
│ A silence permeates the story … ┆ Yes ┆ No  ┆ No  ┆ yes           │
│ And painful must be the storm   ┆ Yes ┆ No  ┆ No  ┆ no            │
│ But through the pale, thin wat… ┆ No  ┆ No  ┆ No  ┆ yes           │
│ …                               ┆ …   ┆ …   ┆ …   ┆ …             │
│ The sessions of sweet silent t… ┆ Yes ┆ Yes ┆ Yes ┆ yes           │
│ The vast world and all its swe… ┆ No  ┆ No  ┆ No  ┆ yes           │
│ Its essence still lives sweetl… ┆ No  ┆ No  ┆ No  ┆ yes           │
│ Lis