In [2]:
import os
from dotenv import load_dotenv
load_dotenv() # API key comes from .env
import anthropic
from translation_utils import get_system_msg, get_user_prompt, parse_output, find_matching_sentences
import pandas as pd

In [4]:
client = anthropic.Anthropic()

In [11]:
prompt_test_input = f"""--- Target word:
πίνει [lemma: πίνω] (verb) | finite; present.active.indicative.third.singular
--- Phrase:
ὅ τε ἵππος αὐτοῖς κριθῆς μὲν οὐδʼ ὅλως γεύεται, ποηφαγῶν ἀεί, πίνει δὲ διὰ πολλοῦ.
--- Source:
Punic Wars by appianus of alexandria
--- Words:
ὅ [lemma: ὁ] (determiner) | singular.masculine.nominative
τε [lemma: τε] (adverb)
ἵππος [lemma: ἵππος] (noun) | singular.feminine.nominative
αὐτοῖς [lemma: αὐτός] (pronoun) | plural.masculine.dative
κριθῆς [lemma: κριθή] (adjective) | singular.neuter.genitive
μὲν [lemma: μέν] (adverb)
οὐδʼ [lemma: οὐδʼ] (adverb)
ὅλως [lemma: ὅλος] (adverb)
γεύεται [lemma: γεύεται] (verb) | finite; present.middle.subjunctive.third.plural
ποηφαγῶν [lemma: ποηφαγέω] (verb) | participle; present.active.singular.nominative
ἀεί [lemma: ἀεί] (adverb)
πίνει [lemma: πίνω] (verb) | finite; past.active.indicative.third.singular
δὲ [lemma: δέ] (particle)
διὰ [lemma: διά] (adposition)
πολλοῦ [lemma: πολύς] (adjective) | singular.neuter.genitive"""



In [None]:
df_sents = pd.read_parquet("../../data/sentences.parquet").drop_duplicates(subset=["sentence_txt"]).reset_index().rename(columns={"index": "grc_sent_idx"})
df_sents["ee_sent_idx"] = None
df_words = pd.read_parquet("../../data/word_to_lemma.parquet").reset_index().rename(columns={"level_0": "grc_target_idx"})
df_words["ee_sent_idx"] = None
df_sents_ee = pd.DataFrame(columns=['ee_sent_idx', "ee_phrase", "ee_target", "ee_words", "grc_sent_idx", "grc_target_idx"])

In [None]:
def get_raw_translation(client: anthropic.Anthropic, input_prompt: str):
    message = client.messages.create(
        model="claude-3-opus-20240229",
        max_tokens=1024,
        system=get_system_msg(),
        messages=[
            {"role": "user", "content": get_user_prompt(input_prompt)}
        ]
    )
    return message


In [None]:
def save_output(df_sents_ee, df_sents_grc, df_words, grc_sent_idx, grc_target_idx, parsed_msg_output):
    ee_sent_idx = len(df_sents_ee)
    row = {
        "ee_sent_idx": ee_sent_idx,
        "ee_phrase": parsed_msg_output["ee_phrase"],
        "ee_target": parsed_msg_output["ee_target"],
        "ee_words": parsed_msg_output["ee_words"],
        "grc_sent_idx": grc_sent_idx,
        "grc_target_idx": grc_target_idx
    }
    # TODO: add to df_sents_ee
    # TODO: add ee_sent_idx to df_sents_grc
    # TODO: add ee_sent_idx to df_words

# save_output(grc_sent_idx, grc_target_idx, output)


In [None]:
msg_output = get_raw_translation(client, prompt_input)
parsed_msg_output = parse_output(msg_output)


In [19]:
# populate df_sents_ee
# first, make an ordering of words in df_words
# then iterate over the ordering. For first n=100 words,:
# get best matching sentence
# from this sentence, generate prompt
# from prompt, generate response
# store response in df_sents_ee with ref to sentence (grc_sent_idx) in df_sents and ref to word (grc_target_idx) in df_words
# update corresponding sentence in df_sents with ee_sent_idx
# update corresponding word in df_words with ee_sent_idx

In [11]:
df_sents

Unnamed: 0,sentence_idx,metadata,sentence_obj,sentence_txt,len_words,len_chars,ee_sent_idx
0,0,"{'author': 'lysias', 'edition': 'Lysias with a...","[{'category': {'F': 'neg', 'N': 'pos', 'V': 'p...","ἴσως τινὲς ὑμῶν, ὦ ἄνδρες δικασταί, διὰ τὸ βού...",23,120,
1,1,"{'author': 'lysias', 'edition': 'Lysias with a...","[{'category': {'F': 'pos', 'N': 'pos', 'V': 'n...","ἐγὼ δὲ τοσούτου δέω περὶ τῶν μὴ, προσηκόντων ἱ...",31,150,
2,2,"{'author': 'lysias', 'edition': 'Lysias with a...","[{'category': {'F': 'neg', 'N': 'neg', 'V': 'p...","οἴμαι μὲν οὖν, ἐάν πάντα διηγήσωμαι τὰ πεπραγμ...",30,172,
3,3,"{'author': 'lysias', 'edition': 'Lysias with a...","[{'category': {'F': 'pos', 'N': 'neg', 'V': 'n...",ἐξ ἀρχῆς οὖν ἀκούσατε.,5,22,
4,4,"{'author': 'lysias', 'edition': 'Lysias with a...","[{'category': {'F': 'neg', 'N': 'neg', 'V': 'p...",ἐράτων ὁ Ἐρασιφῶντος πατὴρ ἐδανείσατο παρὰ τοῦ...,12,71,
...,...,...,...,...,...,...,...
121099,134639,"{'author': 'homeric hymns', 'edition': 'Hymni ...","[{'category': {'F': 'neg', 'N': 'pos', 'V': 'p...","αἰδοίην, χρυσοστέφανον, καλὴν Ἀφροδίτην ᾁσομαι...",30,184,
121100,134640,"{'author': 'homeric hymns', 'edition': 'Hymni ...","[{'category': {'F': 'pos', 'N': 'pos', 'V': 'n...","τὴν δὲ χρυσάμπυκες Ὧραι δέξαντ ̓ ἀσπασίως, περ...",28,140,
121101,134641,"{'author': 'homeric hymns', 'edition': 'Hymni ...","[{'category': {'F': 'pos', 'N': 'neg', 'V': 'n...",ἐν δὲ τρητοῖσι λοβοῖσιν ἄνθεμ ̓ ὀρειχάλκου χρυ...,42,242,
121102,134642,"{'author': 'homeric hymns', 'edition': 'Hymni ...","[{'category': {'F': 'pos', 'N': None, 'V': Non...","αὐτὰρ ἐπειδὴ πάντα περὶ χροῒ κόσμον ἔθηκαν, ἦγ...",37,214,
