# Spatial relations in Latin preverbed motion verbs using LLMs

## Initialisation

In [None]:
import os
import pandas as pd
import openai
from openai import OpenAI
import google.generativeai as genai
from sklearn.model_selection import train_test_split
from dotenv import load_dotenv
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
client = OpenAI(api_key="#key#")

#test
response = client.chat.completions.create(
    model="gpt-4", 
    messages=[
        {"role": "user", "content": "Hello!"}
    ]
)
print(response.choices[0].message.content)

Input files here.

## Read annotated texts

Create a data frame with the annotated texts:

In [None]:
annotated = pd.read_csv(annotated_texts, header = 0)
annotated.head()

As the `annotated`dataset includes both Ancient Greek and Latin, I filter out Greek.

In [None]:
selected_Latin_preverbs = ['ad', 'ex', 'per', 'in', 'cum', 'ab', 'trans', 'sub', 'ob', 'pro']
annotated_Latin = annotated[annotated['PREVERB'].isin(selected_Latin_preverbs) & (annotated['language'] == 'Latin')]
annotated_Latin.head()

I filter out rows where no spatial relations occur.

In [None]:
annotated_Latin_filtered = annotated_Latin[annotated_Latin['SPATIAL RELATION ROLE'].notna()]
annotated_Latin_filtered.head()

# Task 1: Motion Verb Identification

In [None]:
def make_prompts(df, sentence, verb, preverb, number_of_shots, input_col='SENTENCE', gold_standard_col='VERB TOKEN'):
    prompt = (
        f"This is a task of Latin linguistics. Given the following Latin sentences, "
        f"identify all the forms of the verb '{verb}' across all sentences. "
        f"Note that verbs may occur more than once and in more than one sentence, "
        f"so PROVIDE ALL THE FORMS YOU DETECT."
    )

    if number_of_shots > 0:
        shots = df.sample(n=number_of_shots, random_state=42)
        examples = [
            f"Sentence: {row[input_col]}\nAnswer: {row[gold_standard_col]}"
            for _, row in shots.iterrows()
        ]
        prompt += "\n\n" + "\n\n".join(examples)

    prompt += f"\n\nSentence: {sentence}\nAnswer:"
    return prompt

In [None]:
responses = []

for row_num, (idx, row) in enumerate(annotated_Latin_filtered.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(annotated_Latin_filtered)}")

    try:
        prompt = make_prompts(
            annotated_Latin_filtered,
            row['SENTENCE'],
            row['LEMMA'],
            row['PREVERB'],
            number_of_shots=0,
            input_col='SENTENCE',
            gold_standard_col='VERB TOKEN'
        )

        response = client.chat.completions.create(
            model="gpt-4", 
            messages=[{"role": "user", "content": prompt}]
        )

        content = response.choices[0].message.content.strip()
        prediction = content if content else "N/A"

    except Exception as e:
        print(f"Error at row {row_num}: {e}")
        prediction = "ERROR"

    responses.append(prediction)
    annotated_Latin_filtered.loc[idx, 'predicted'] = prediction

## One shot

In [None]:
def make_prompts(df, sentence, verb, preverb, number_of_shots, input_col='SENTENCE', gold_standard_col='VERB TOKEN'):
    prompt = (
        f"This is a task of Latin linguistics. Given the following Latin sentences, "
        f"identify all the forms of the verb '{verb}' across all sentences. "
        f"Note that verbs may occur more than once and in more than one sentence, "
        f"so PROVIDE ALL THE FORMS YOU DETECT."
    )

    if number_of_shots > 0:
        shots = df.sample(n=number_of_shots, random_state=42)
        examples = [
            f"Sentence: {row[input_col]}\nAnswer: {row[gold_standard_col]}"
            for _, row in shots.iterrows()
        ]
        prompt += "\n\n" + "\n\n".join(examples)

    prompt += f"\n\nSentence: {sentence}\nAnswer:"
    return prompt

In [None]:
responses = []

for row_num, (idx, row) in enumerate(annotated_Latin_filtered.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(annotated_Latin_filtered)}")

    try:
        prompt = make_prompts(
            annotated_Latin_filtered,
            row['SENTENCE'],
            row['LEMMA'],
            row['PREVERB'],
            number_of_shots=1,
            input_col='SENTENCE',
            gold_standard_col='VERB TOKEN'
        )

        response = client.chat.completions.create(
            model="gpt-4", 
            messages=[{"role": "user", "content": prompt}]
        )

        content = response.choices[0].message.content.strip()
        prediction = content if content else "N/A"

    except Exception as e:
        print(f"Error at row {row_num}: {e}")
        prediction = "ERROR"

    responses.append(prediction)
    annotated_Latin_filtered.loc[idx, 'predicted'] = prediction



## Five shots

In [None]:
def make_prompts(df, sentence, verb, preverb, number_of_shots, input_col='SENTENCE', gold_standard_col='VERB TOKEN'):
    prompt = (
        f"This is a task of Latin linguistics. Given the following Latin sentences, "
        f"identify all the forms of the verb '{verb}' across all sentences. "
        f"Note that verbs may occur more than once and in more than one sentence, "
        f"so PROVIDE ALL THE FORMS YOU DETECT."
    )

    if number_of_shots > 0:
        shots = df.sample(n=number_of_shots, random_state=42)
        examples = [
            f"Sentence: {row[input_col]}\nAnswer: {row[gold_standard_col]}"
            for _, row in shots.iterrows()
        ]
        prompt += "\n\n" + "\n\n".join(examples)

    prompt += f"\n\nSentence: {sentence}\nAnswer:"
    return prompt

In [None]:
responses = []

for row_num, (idx, row) in enumerate(annotated_Latin_filtered.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(annotated_Latin_filtered)}")

    try:
        prompt = make_prompts(
            annotated_Latin_filtered,
            row['SENTENCE'],
            row['LEMMA'],
            row['PREVERB'],
            number_of_shots=5,
            input_col='SENTENCE',
            gold_standard_col='VERB TOKEN'
        )

        response = client.chat.completions.create(
            model="gpt-4", 
            messages=[{"role": "user", "content": prompt}]
        )

        content = response.choices[0].message.content.strip()
        prediction = content if content else "N/A"

    except Exception as e:
        print(f"Error at row {row_num}: {e}")
        prediction = "ERROR"

    responses.append(prediction)
    annotated_Latin_filtered.loc[idx, 'predicted'] = prediction




# Task 2: Spatial Relation Detection and Classification

In [None]:
def make_prompts(
    df,
    sentence,
    verb,
    preverb,
    number_of_shots,
    input_col='SENTENCE',
    gold_standard_col='VERB TOKEN',
    gold_standard_col_source='source',
    gold_standard_col_goal='goal',
    gold_standard_col_path='path'
):

    prompt = (
        f"This is a task of Latin linguistics. Given the following Latin sentences, "
        f"identify all the forms of the verb '{verb}' across all sentences. "
        f"Note that verbs may occur more than once and in more than one sentence, "
        f"so PROVIDE ALL THE FORMS YOU DETECT.\n"
        f"Then, additionally answer:\n"
        f"- Does the sentence contain a **source expression**? True or False\n"
        f"- Does the sentence contain a **goal expression**? True or False\n"
        f"- Does the sentence contain a **path expression**? True or False"
    )

    if number_of_shots > 0:
        shots = df.sample(n=number_of_shots, random_state=42)
        examples = []
        for _, row in shots.iterrows():
            example = (
                f"\n\nSentence: {row[input_col]}\n"
                f"Answer:\n{row[gold_standard_col]}\n"
                f"- Source expression: {row[gold_standard_col_source]}\n"
                f"- Goal expression: {row[gold_standard_col_goal]}\n"
                f"- Path expression: {row[gold_standard_col_path]}"
            )
            examples.append(example)
        prompt += "".join(examples)

    prompt += (
        f"\n\nSentence: {sentence}\nAnswer:\n\n"
        f"List all forms of the verb '{verb}' you find in the sentence.\n\n"
        f"Then answer as follows:\n"
        f"- Source expression: True or False\n"
        f"- Goal expression: True or False\n"
        f"- Path expression: True or False\n"
    )


    return prompt


## Zero shot

In [None]:
# Loop through rows and get GPT answers
for row_num, (idx, row) in enumerate(annotated_Latin_filtered.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(annotated_Latin_filtered)}")

    try:
        prompt = make_prompts(
            annotated_Latin_filtered,
            row['SENTENCE'],
            row['LEMMA'],
            row['PREVERB'],
            number_of_shots=0,  
            input_col='SENTENCE',
            gold_standard_col='VERB TOKEN',
            gold_standard_col_source='source',
            gold_standard_col_goal='goal',
            gold_standard_col_path='path'
        )

        # Send to GPT-4
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a Latin linguistics expert. Always follow the format requested."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )

        content = response.choices[0].message.content.strip()

        annotated_Latin_filtered.loc[idx, 'predicted'] = content


        source_flag, goal_flag, path_flag = extract_boolean_flags(content)

        annotated_Latin_filtered.loc[idx, 'predicted_source'] = str(source_flag)
        annotated_Latin_filtered.loc[idx, 'predicted_goal'] = str(goal_flag)
        annotated_Latin_filtered.loc[idx, 'predicted_path'] = str(path_flag)

    except Exception as e:
        print(f"Error at row {row_num}: {e}")
        annotated_Latin_filtered.loc[idx, 'predicted'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_source'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_goal'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_path'] = "ERROR"




## One shot

In [None]:
def extract_boolean_flags(response_content):
    try:
        source = re.search(r"source expression:\s*(true|false)", response_content, re.IGNORECASE)
        goal = re.search(r"goal expression:\s*(true|false)", response_content, re.IGNORECASE)
        path = re.search(r"path expression:\s*(true|false)", response_content, re.IGNORECASE)

        return (
            source.group(1).lower() == "true" if source else False,
            goal.group(1).lower() == "true" if goal else False,
            path.group(1).lower() == "true" if path else False,
        )
    except:
        return (False, False, False)

In [None]:
for row_num, (idx, row) in enumerate(annotated_Latin_filtered.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(annotated_Latin_filtered)}")

    try:
        prompt = make_prompts(
            annotated_Latin_filtered,
            row['SENTENCE'],
            row['LEMMA'],
            row['PREVERB'],
            number_of_shots=1,  
            input_col='SENTENCE',
            gold_standard_col='VERB TOKEN',
            gold_standard_col_source='source',
            gold_standard_col_goal='goal',
            gold_standard_col_path='path'
        )

        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a Latin linguistics expert. Always follow the format requested."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )

        content = response.choices[0].message.content.strip()

        annotated_Latin_filtered.loc[idx, 'predicted'] = content

        source_flag, goal_flag, path_flag = extract_boolean_flags(content)

        annotated_Latin_filtered.loc[idx, 'predicted_source'] = str(source_flag)
        annotated_Latin_filtered.loc[idx, 'predicted_goal'] = str(goal_flag)
        annotated_Latin_filtered.loc[idx, 'predicted_path'] = str(path_flag)

    except Exception as e:
        print(f"Error at row {row_num}: {e}")
        annotated_Latin_filtered.loc[idx, 'predicted'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_source'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_goal'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_path'] = "ERROR"

## Few shots: 3 (one "True" per spatial relation)

In [None]:
def make_prompts_3_shot(df, sentence, verb, preverb, input_col='SENTENCE', gold_standard_col='VERB TOKEN', gold_standard_col_source='source', gold_standard_col_goal='goal', gold_standard_col_path='path'):
    prompt = (
        f"This is a task of Latin linguistics. Given the following Latin sentences, "
        f"identify all the forms of the verb '{verb}' across all sentences.\n"
        f"Then, additionally answer:\n"
        f"- Does the sentence contain a **source expression**? True or False\n"
        f"- Does the sentence contain a **goal expression**? True or False\n"
        f"- Does the sentence contain a **path expression**? True or False"
    )

    conditions = {
        'source': gold_standard_col_source,
        'goal': gold_standard_col_goal,
        'path': gold_standard_col_path
    }
    
    for label, col in conditions.items():
        ex = df[df[col] == True].sample(n=1, random_state=42)
        for _, row in ex.iterrows():
            prompt += (
                f"\n\nSentence: {row[input_col]}\n"
                f"Answer:\n{row[gold_standard_col]}\n"
                f"- Source expression: {row[gold_standard_col_source]}\n"
                f"- Goal expression: {row[gold_standard_col_goal]}\n"
                f"- Path expression: {row[gold_standard_col_path]}"
            )

    prompt += (
        f"\n\nSentence: {sentence}\nAnswer:\n\n"
        f"List all forms of the verb '{verb}' you find in the sentence.\n\n"
        f"Then answer as follows:\n"
        f"- Source expression: True or False\n"
        f"- Goal expression: True or False\n"
        f"- Path expression: True or False\n"
    )
    return prompt

In [None]:
def extract_boolean_flags(response_content):
    try:
        source = re.search(r"source expression:\s*(true|false)", response_content, re.IGNORECASE)
        goal = re.search(r"goal expression:\s*(true|false)", response_content, re.IGNORECASE)
        path = re.search(r"path expression:\s*(true|false)", response_content, re.IGNORECASE)

        return (
            source.group(1).lower() == "true" if source else False,
            goal.group(1).lower() == "true" if goal else False,
            path.group(1).lower() == "true" if path else False,
        )
    except:
        return (False, False, False)

for row_num, (idx, row) in enumerate(annotated_Latin_filtered.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(annotated_Latin_filtered)}")

    try:
        prompt = make_prompts_3_shot(
            annotated_Latin_filtered,
            row['SENTENCE'],
            row['LEMMA'],
            row['PREVERB'],
            input_col='SENTENCE',
            gold_standard_col='VERB TOKEN',
            gold_standard_col_source='source',
            gold_standard_col_goal='goal',
            gold_standard_col_path='path'
        )

        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a Latin linguistics expert. Always follow the format requested."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )

        content = response.choices[0].message.content.strip()

        annotated_Latin_filtered.loc[idx, 'predicted'] = content

        source_flag, goal_flag, path_flag = extract_boolean_flags(content)

        annotated_Latin_filtered.loc[idx, 'predicted_source'] = str(source_flag)
        annotated_Latin_filtered.loc[idx, 'predicted_goal'] = str(goal_flag)
        annotated_Latin_filtered.loc[idx, 'predicted_path'] = str(path_flag)

    except Exception as e:
        print(f"Error at row {row_num}: {e}")
        annotated_Latin_filtered.loc[idx, 'predicted'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_source'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_goal'] = "ERROR"
        annotated_Latin_filtered.loc[idx, 'predicted_path'] = "ERROR"

## Few shots: 6 (two shots per spatial relation, one "True" and one "False")

In [None]:
def make_prompts_6_shot(
    df,
    sentence,
    verb,
    preverb,
    input_col='SENTENCE',
    gold_standard_col='VERB TOKEN',
    gold_standard_col_source='source',
    gold_standard_col_goal='goal',
    gold_standard_col_path='path'
):
    prompt = (
        f"This is a task of Latin linguistics. Given the following Latin sentences, "
        f"identify all the forms of the verb '{verb}' across all sentences.\n"
        f"Then, additionally answer:\n"
        f"- Does the sentence contain a **source expression**? True or False\n"
        f"- Does the sentence contain a **goal expression**? True or False\n"
        f"- Does the sentence contain a **path expression**? True or False"
    )

    # Balanced sampling: 1 True and 1 False example for each relation
    conditions = {
        'source': gold_standard_col_source,
        'goal': gold_standard_col_goal,
        'path': gold_standard_col_path
    }

    for label, col in conditions.items():
        true_example = df[df[col] == True].sample(n=1, random_state=42)
        false_example = df[df[col] == False].sample(n=1, random_state=99)

        for subset in [true_example, false_example]:
            for _, row in subset.iterrows():
                prompt += (
                    f"\n\nSentence: {row[input_col]}\n"
                    f"Answer:\n{row[gold_standard_col]}\n"
                    f"- Source expression: {row[gold_standard_col_source]}\n"
                    f"- Goal expression: {row[gold_standard_col_goal]}\n"
                    f"- Path expression: {row[gold_standard_col_path]}"
                )

    prompt += (
        f"\n\nSentence: {sentence}\nAnswer:\n\n"
        f"List all forms of the verb '{verb}' you find in the sentence.\n\n"
        f"Then answer as follows:\n"
        f"- Source expression: True or False\n"
        f"- Goal expression: True or False\n"
        f"- Path expression: True or False\n"
    )

    return prompt


# Task 3: Spatial Relation Type Disambiguation

## Zero shot

In [None]:
def make_place_disambiguation_prompt(sentence, relation_type, verb):
    return (
        f"This is a Latin linguistics task.\n"
        f"You are given a sentence and a specific motion verb '{verb}'.\n"
        f"Your job is to identify the single word (token) in '{sentence}' that:\n"
        f"  - Is a direct argument of the verb '{verb}'\n"
        f"  - Expresses the spatial relation '{relation_type}' (Source, Goal, or Path) with respect to this verb\n"
        f"  - And is either an adverb, a common noun referring to a place, or a proper noun referring to a place name\n\n"
        f"Ignore any other place names or nouns that do not have this syntactic and semantic relation with '{verb}'.\n"
        f"Answer with exactly two lines, no extra text:\n"
        f"Token: <token>\n"
        f"adverb | common noun | proper noun"
    )


In [None]:
for row_num, (idx, row) in enumerate(task3_ready_df.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(task3_ready_df)}")

    for relation in ['predicted_source', 'predicted_goal', 'predicted_path']:
        if row.get(relation) == True:
            prompt = make_place_disambiguation_prompt(row['SENTENCE'], relation, row['LEMMA'])
            
            try:
                response = client.chat.completions.create(
                    model="gpt-4",
                    messages=[
                        {"role": "system", "content": "You are a Latin linguist."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0
                )
                content = response.choices[0].message.content.strip()
            
                token_match = re.search(r"Token:\s*(\S+)", content, re.IGNORECASE)
                type_match = re.search(r"\b(adverb|common noun|proper noun)\b", content, re.IGNORECASE)

                token = token_match.group(1) if token_match else None
                typ = type_match.group(1).lower() if type_match else None

                task3_ready_df.loc[idx, 'predicted_SR_token'] = token
                task3_ready_df.loc[idx, 'predicted_SR_type'] = typ

                break

            except Exception as e:
                print(f"Error processing row {idx}, relation {relation}: {e}")
                task3_ready_df.loc[idx, 'predicted_SR_type'] = "ERROR"
                task3_ready_df.loc[idx, 'predicted_SR_token'] = None
                break



## One shot (proper nouns)

In [None]:
import random
import re

def make_place_disambiguation_prompt(sentence, relation_type, verb, df):
    # Select one random example with SR_type == 'proper noun'
    proper_noun_rows = df[df['SR_type'] == 'proper noun']
    if proper_noun_rows.empty:
        raise ValueError("No proper noun examples found in the DataFrame.")
    
    example_row = proper_noun_rows.sample(1).iloc[0]
    example_sentence = example_row['SENTENCE']
    example_verb = example_row['LEMMA']
    example_place_token = example_row.get('SR_token', '<place_token>')
    
    example = (
        "Example:\n"
        f"Sentence: {example_sentence}\n"
        f"Verb: {example_verb}\n"
        f"Relation: predicted_source\n"
        f"Answer:\n"
        f"Token: {example_place_token}\n"
        f"proper noun\n\n"
    )
    
    prompt = (
        f"This is a task of Latin linguistics. You are given a Latin sentence and a motion verb.\n"
        f"Focus only on the expression that refers to the spatial relation of type '{relation_type}' "
        f"in relation to the verb '{verb}'.\n"
        f"Identify the word (token) in the sentence that expresses this spatial relation, "
        f"and classify it as:\n"
        f"- adverb\n"
        f"- common noun referring to a place (e.g., 'domus', 'forum')\n"
        f"- proper noun referring to a place (e.g., 'Roma', 'Carthago')\n\n"
        f"{example}"
        f"Now analyze the following sentence.\n"
        f"Sentence: {sentence}\n"
        f"Verb: {verb}\n"
        f"Relation: {relation_type}\n"
        f"Answer:\n"
        f"Token: <token>\n"
        f"[adverb | common noun | proper noun]"
    )
    
    return prompt


In [None]:

for row_num, (idx, row) in enumerate(task3_ready_df.iterrows(), start=1):
    if row_num % 10 == 0:
        print(f"Processing row {row_num} of {len(task3_ready_df)}")
    
    for relation in ['predicted_source', 'predicted_goal', 'predicted_path']:
        if row.get(relation) is True:
            prompt = make_place_disambiguation_prompt(
                row['SENTENCE'],
                relation,
                row['LEMMA'],
                task3_ready_df
            )
            
            try:
                response = client.chat.completions.create(
                    model="gpt-4",
                    messages=[
                        {"role": "system", "content": "You are a Latin linguist."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0
                )
                
                content = response.choices[0].message.content.strip()
                token_match = re.search(r"Token:\s*(\S+)", content, re.IGNORECASE)
                type_match = re.search(r"\b(adverb|common noun|proper noun)\b", content, re.IGNORECASE)
                
                token = token_match.group(1) if token_match else None
                typ = type_match.group(1).lower() if type_match else None
                
                task3_ready_df.loc[idx, 'predicted_SR_token'] = token
                task3_ready_df.loc[idx, 'predicted_SR_type'] = typ
                break  # Stop after first matching relation
            
            except Exception as e:
                print(f"Error processing row {idx}, relation {relation}: {e}")
                task3_ready_df.loc[idx, 'predicted_SR_type'] = "ERROR"
                task3_ready_df.loc[idx, 'predicted_SR_token'] = None
                break

