In [None]:
from datetime import datetime
import csv
import copy

import pandas as pd
import dspy

from programs import WrapperEnglishSPT, evaluate_answer
from custom_evaluation import custom_evaluate

In [None]:
lm = dspy.LM(
    "ollama_chat/deepseek-r1:14b",
    api_base="http://localhost:11434",
)
dspy.settings.configure(lm=lm)

In [None]:
data = pd.read_csv("test_dwug_en.csv")

In [None]:
examples = []


for _, row in data.iterrows():
    examples.append(
        dspy.Example(
            sentence1=row["context_x"],
            sentence2=row["context_y"],
            target_word=row["lemma"].split("_")[0],
            answer=row["judgment"],
        ).with_inputs("sentence1", "sentence2", "target_word")
    )

In [None]:
program_spt_prompt_en_assertions = WrapperEnglishSPT().activate_assertions()
program_spt_prompt_en_assertions.load(
    "compile-models/sp/en_spt_mipro_optimized_prompt_en_deepseek-q4"
)

In [None]:
start_time = datetime.now()

result = custom_evaluate(
    examples,
    evaluate_answer,
    program_spt_prompt_en_assertions,
    report_result=True,
    debug=False,
)

print(f"Elapsed time: {datetime.now() - start_time}")

In [None]:
reasoning = [item.reasoning if item else None for item in result]
pred = [item.answer if item else None for item in result]

In [None]:
annotated_data = pd.DataFrame()

annotated_data["sentence1"] = data["context_x"].tolist()
annotated_data["sentence2"] = data["context_y"].tolist()
annotated_data["gold_label"] = [item.answer for item in examples]
annotated_data["prediction"] = pred
annotated_data["reasoning"] = reasoning
annotated_data["grouping1"] = data["grouping_x"].tolist()
annotated_data["grouping2"] = data["grouping_y"].tolist()
annotated_data["identifier1"] = data["identifier1"].tolist()
annotated_data["identifier2"] = data["identifier2"].tolist()
annotated_data["word"] = data["lemma"].tolist()
annotated_data["judgment"] = data["judgment"].tolist()

In [None]:
annotated_data.to_csv("sp-dwug-en-deepseek.csv", index=False)