In [1]:
# Install necessary libraries
import sys
libraries = ["polars", "openai", "instructor", "pydantic"]

for library in libraries:
    try:
        __import__(library)
        print(f"{library} is already installed.")
    except ImportError:
        print(f"{library} not found, installing...")
        !{sys.executable} -m pip install {library}


polars is already installed.
openai is already installed.
instructor is already installed.
pydantic is already installed.


In [2]:
import pandas as pd
from utils_iaa import prepare_df
import polars as pl
from llm_utils import annotate_synesthesia

# Linguistic Synesthesia - yes or no?: Annotating with LLM

## Reading and preparing the files

In [3]:
#reading the file
filepath = "./data/synesthesia-yes-no.csv"
df = pd.read_csv(filepath, sep=",")

In [4]:
df

Unnamed: 0,Timestamp,A dull head among windy spaces.,"A fine, pedantic sunshine in a satin vest",A lonely cab-horse steams and stamps.,"A silence suffuses the story, and a softness the teller’s eye",And sore must be the storm,Anecdotes of air in dungeons have sometimes proved deadly sweet!,Ascending from the damp savannas,But just to hear the grace depart,But through the water pale and thin Still shine the unoffending feet,...,the floors of silent seas.,the perfumed tincture of the roses,the sessions of sweet silent thought,the wide world and all her fading sweets,their substance still lives sweet,through heavy sleep on sightless eyes doth stay,"thy hungry eyes, even till they wink with fulness",to hear with eyes belongs to love’s fine wit,what strained touches rhetoric can lend,your sweet semblance to some other give
0,06/05/2024 21:57:50,yes,yes,no,yes,yes,yes,no,yes,yes,...,yes,no,yes,yes,yes,no,yes,yes,yes,yes
1,07/05/2024 09:24:15,yes,yes,yes,yes,yes,yes,yes,yes,yes,...,yes,yes,yes,yes,yes,yes,yes,yes,no,yes
2,08/05/2024 10:02:43,yes,yes,yes,yes,yes,no,yes,no,yes,...,yes,yes,yes,yes,yes,no,no,yes,no,yes


In [5]:
df = prepare_df(df)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   instance    90 non-null     object
 1   annotator1  90 non-null     object
 2   annotator2  90 non-null     object
 3   annotator3  90 non-null     object
dtypes: object(4)
memory usage: 2.9+ KB


## Annotating with LLM

In [9]:
#making a new df with polars for sake of mimicking Enrica's notebook
data = pl.DataFrame(df)

In [10]:
# Adding a placeholder column for the annotation
data = data.with_columns(pl.Series("annotator-llm", range(0, data.height)))
print(data)

shape: (90, 5)
┌─────────────────────────────────┬────────────┬────────────┬────────────┬───────────────┐
│ instance                        ┆ annotator1 ┆ annotator2 ┆ annotator3 ┆ annotator-llm │
│ ---                             ┆ ---        ┆ ---        ┆ ---        ┆ ---           │
│ str                             ┆ str        ┆ str        ┆ str        ┆ i64           │
╞═════════════════════════════════╪════════════╪════════════╪════════════╪═══════════════╡
│ A dull head among windy spaces… ┆ yes        ┆ yes        ┆ yes        ┆ 0             │
│ A fine, pedantic sunshine in a… ┆ yes        ┆ yes        ┆ yes        ┆ 1             │
│ A lonely cab-horse steams and … ┆ no         ┆ yes        ┆ yes        ┆ 2             │
│ A silence suffuses the story, … ┆ yes        ┆ yes        ┆ yes        ┆ 3             │
│ And sore must be the storm      ┆ yes        ┆ yes        ┆ yes        ┆ 4             │
│ …                               ┆ …          ┆ …          ┆ …          ┆ 

In [11]:
my_instructions =  """Linguistic synaesthesia involves conceptual conflicts created by two concepts from two distinct sensory domains. 
In most cases, linguistic synaesthesia is realized when a sensory lexeme from one sensory domain describes another sensory lexeme from a different sensory domain. 
For example, in a phrase sweet voice, sweet is a concept originated in TASTE while voice is a HEARING concept. (Zhong and Ahrens, 2021)
Here we aim to identify the whether instances selected from poetry from different time periods contain synesthesia or not.

Do you idenfity lingustic synesthesia in the following text?"""


In [12]:
annotation_dict = annotate_synesthesia(data, my_instructions)

 --- I HAVE ANNOTATED TEXT 0 ---
Linguistic synaesthesia involves conceptual conflicts created by two concepts from two distinct sensory domains. 
In most cases, linguistic synaesthesia is realized when a sensory lexeme from one sensory domain describes another sensory lexeme from a different sensory domain. 
For example, in a phrase sweet voice, sweet is a concept originated in TASTE while voice is a HEARING concept. (Zhong and Ahrens, 2021)
Here we aim to identify the whether instances selected from poetry from different time periods contain synesthesia or not.

Do you idenfity lingustic synesthesia in the following text? A dull head among windy spaces.
option='no'


 --- I HAVE ANNOTATED TEXT 1 ---
Linguistic synaesthesia involves conceptual conflicts created by two concepts from two distinct sensory domains. 
In most cases, linguistic synaesthesia is realized when a sensory lexeme from one sensory domain describes another sensory lexeme from a different sensory domain. 
For example

In [13]:
# Put the annotation in the dataframe and save
data = data.with_columns(pl.col("annotator-llm").replace(annotation_dict))
data.write_csv("./data/final_synesthesia-yes-no_all-annotators.csv")
print(data)

shape: (90, 5)
┌─────────────────────────────────┬────────────┬────────────┬────────────┬───────────────┐
│ instance                        ┆ annotator1 ┆ annotator2 ┆ annotator3 ┆ annotator-llm │
│ ---                             ┆ ---        ┆ ---        ┆ ---        ┆ ---           │
│ str                             ┆ str        ┆ str        ┆ str        ┆ str           │
╞═════════════════════════════════╪════════════╪════════════╪════════════╪═══════════════╡
│ A dull head among windy spaces… ┆ yes        ┆ yes        ┆ yes        ┆ no            │
│ A fine, pedantic sunshine in a… ┆ yes        ┆ yes        ┆ yes        ┆ no            │
│ A lonely cab-horse steams and … ┆ no         ┆ yes        ┆ yes        ┆ yes           │
│ A silence suffuses the story, … ┆ yes        ┆ yes        ┆ yes        ┆ yes           │
│ And sore must be the storm      ┆ yes        ┆ yes        ┆ yes        ┆ no            │
│ …                               ┆ …          ┆ …          ┆ …          ┆ 