In [1]:
# Install necessary libraries
import sys
libraries = ["polars", "openai", "instructor", "pydantic"]

for library in libraries:
    try:
        __import__(library)
        print(f"{library} is already installed.")
    except ImportError:
        print(f"{library} not found, installing...")
        !{sys.executable} -m pip install {library}


polars is already installed.
openai is already installed.
instructor is already installed.
pydantic is already installed.


In [2]:
import pandas as pd
from utils_iaa import transpose_dataframe, rename_columns_with_prefix
import polars as pl
from llm_utils import do_translation

# Linguistic Synesthesia - Translation and Backtranslation with LLM

## Reading and preparing the files

In [3]:
#reading the file
filepath = "./final_corpus/final_corpus_synesthesia_yes_no.csv"
df = pd.read_csv(filepath, sep=",")

In [4]:
#retrieving the instances that were labelled as synesthesia in final corpus
df = df[df['label'] == 'yes']

In [6]:
#making a new df with polars for sake of mimicking Enrica's notebook
data = pl.DataFrame(df)

## Translating with LLM

In [7]:
# Adding a placeholder column for the annotation
data = data.with_columns(
    pl.Series("ES_translation_LLM", range(0, data.height)), # Adding a placeholder column for Spanish translations
    pl.Series("DE_translation_LLM", range(0, data.height)), # Adding a placeholder column for German translations
    pl.Series("ES_backtranslation_LLM", range(0, data.height)), # Adding a placeholder column for Spanish backtranslations
    pl.Series("DE_backtranslation_LLM", range(0, data.height))  # Adding a placeholder column for German backtranslations
)
print(data)

shape: (70, 6)
┌──────────────────┬───────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┐
│ instance         ┆ label ┆ ES_translation_ ┆ DE_translation_ ┆ ES_backtranslat ┆ DE_backtranslat │
│ ---              ┆ ---   ┆ LLM             ┆ LLM             ┆ ion_LLM         ┆ ion_LLM         │
│ str              ┆ str   ┆ ---             ┆ ---             ┆ ---             ┆ ---             │
│                  ┆       ┆ i64             ┆ i64             ┆ i64             ┆ i64             │
╞══════════════════╪═══════╪═════════════════╪═════════════════╪═════════════════╪═════════════════╡
│ A dull head      ┆ yes   ┆ 0               ┆ 0               ┆ 0               ┆ 0               │
│ among windy      ┆       ┆                 ┆                 ┆                 ┆                 │
│ spaces…          ┆       ┆                 ┆                 ┆                 ┆                 │
│ A fine, pedantic ┆ yes   ┆ 1               ┆ 1               ┆ 1          

In [8]:
language = ["Spanish", "German"]
for item in language:
    if item == "Spanish":
        my_instructions =  f"Translate the following text into {item}"
        es_translation_dict = do_translation(data, my_instructions)
    elif item == "German":
        my_instructions =  f"Translate the following text into {item}"
        de_translation_dict = do_translation(data, my_instructions)

 --- I HAVE TRANSLATED TEXT 0 ---
Translate the following text into Spanish A dull head among windy spaces.
Una cabeza aburrida entre espacios ventosos.


 --- I HAVE TRANSLATED TEXT 1 ---
Translate the following text into Spanish A fine, pedantic sunshine in a satin vest
Un fino y pedante sol con un chaleco de satén.


 --- I HAVE TRANSLATED TEXT 2 ---
Translate the following text into Spanish A lonely cab-horse steams and stamps.
Un caballo de taxi solitario resopla y patea.


 --- I HAVE TRANSLATED TEXT 3 ---
Translate the following text into Spanish A silence suffuses the story, and a softness the teller’s eye
Un silencio impregna la historia y una suavidad el ojo del narrador.


 --- I HAVE TRANSLATED TEXT 4 ---
Translate the following text into Spanish And sore must be the storm
Y dolorosa debe ser la tormenta


 --- I HAVE TRANSLATED TEXT 5 ---
Translate the following text into Spanish Anecdotes of air in dungeons have sometimes proved deadly sweet!
¡Las anécdotas del aire en la

In [9]:
# Put the translations into ES and DE in the dataframe
data = data.with_columns(pl.col("ES_translation_LLM").replace(es_translation_dict),
                        pl.col("DE_translation_LLM").replace(de_translation_dict))
#data.write_csv("./data/final_synesthesia-yes-no_all-annotators.csv")
print(data)

shape: (70, 6)
┌──────────────────┬───────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┐
│ instance         ┆ label ┆ ES_translation_ ┆ DE_translation_ ┆ ES_backtranslat ┆ DE_backtranslat │
│ ---              ┆ ---   ┆ LLM             ┆ LLM             ┆ ion_LLM         ┆ ion_LLM         │
│ str              ┆ str   ┆ ---             ┆ ---             ┆ ---             ┆ ---             │
│                  ┆       ┆ str             ┆ str             ┆ i64             ┆ i64             │
╞══════════════════╪═══════╪═════════════════╪═════════════════╪═════════════════╪═════════════════╡
│ A dull head      ┆ yes   ┆ Una cabeza      ┆ Ein stumpfer    ┆ 0               ┆ 0               │
│ among windy      ┆       ┆ aburrida entre  ┆ Kopf unter      ┆                 ┆                 │
│ spaces…          ┆       ┆ espa…           ┆ windig…         ┆                 ┆                 │
│ A fine, pedantic ┆ yes   ┆ Un fino y       ┆ Ein feiner,     ┆ 1          

## Backranslating with LLM

In [12]:
my_instructions =  f"Translate the following text into English"
column = ["ES_translation_LLM", "DE_translation_LLM"]
for column_name in column:
    if "ES" in column_name:
        es_backtranslation_dict = do_translation(data, my_instructions, column_name)
    elif "DE" in column_name:
        de_backtranslation_dict = do_translation(data, my_instructions, column_name)

 --- I HAVE TRANSLATED TEXT 0 ---
Translate the following text into English Una cabeza aburrida entre espacios ventosos.
A bored head among windy spaces.


 --- I HAVE TRANSLATED TEXT 1 ---
Translate the following text into English Un fino y pedante sol con un chaleco de satén.
A fine and pedantic sun with a satin vest.


 --- I HAVE TRANSLATED TEXT 2 ---
Translate the following text into English Un caballo de taxi solitario resopla y patea.
A lone taxi horse snorts and kicks.


 --- I HAVE TRANSLATED TEXT 3 ---
Translate the following text into English Un silencio impregna la historia y una suavidad el ojo del narrador.
A silence permeates the story and a softness the narrator's eye.


 --- I HAVE TRANSLATED TEXT 4 ---
Translate the following text into English Y dolorosa debe ser la tormenta
And painful must be the storm


 --- I HAVE TRANSLATED TEXT 5 ---
Translate the following text into English ¡Las anécdotas del aire en las mazmorras a veces han resultado dulcemente mortales!
The 

In [13]:
# Put the backtranslations into EN in the dataframe and save the file
data = data.with_columns(pl.col("ES_backtranslation_LLM").replace(es_backtranslation_dict),
                        pl.col("DE_backtranslation_LLM").replace(de_backtranslation_dict))
data.write_csv("./data/ES_DE_backtranslations_ChatGPT.csv")
print(data)

shape: (70, 6)
┌──────────────────┬───────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┐
│ instance         ┆ label ┆ ES_translation_ ┆ DE_translation_ ┆ ES_backtranslat ┆ DE_backtranslat │
│ ---              ┆ ---   ┆ LLM             ┆ LLM             ┆ ion_LLM         ┆ ion_LLM         │
│ str              ┆ str   ┆ ---             ┆ ---             ┆ ---             ┆ ---             │
│                  ┆       ┆ str             ┆ str             ┆ str             ┆ str             │
╞══════════════════╪═══════╪═════════════════╪═════════════════╪═════════════════╪═════════════════╡
│ A dull head      ┆ yes   ┆ Una cabeza      ┆ Ein stumpfer    ┆ A bored head    ┆ A dull head     │
│ among windy      ┆       ┆ aburrida entre  ┆ Kopf unter      ┆ among windy     ┆ under windy     │
│ spaces…          ┆       ┆ espa…           ┆ windig…         ┆ space…          ┆ rooms.          │
│ A fine, pedantic ┆ yes   ┆ Un fino y       ┆ Ein feiner,     ┆ A fine and 