In [205]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import csv
import json
import re
from tqdm import tqdm

In [206]:
load_dotenv()

assert os.getenv("OPENAI_API_KEY") 

openai_client = OpenAI()

In [207]:
def generate_response(messages, data_json):
    response = openai_client.chat.completions.create(
    model="gpt-4.1",
    messages = messages
    )
    
    if (response.choices[0].message.content == data_json["original_key"]) and (response.choices[0].message.content == data_json["validator_answer"]):
        return True, f'{data_json["original_id"]}: {response.choices[0].message.content}'
    else:
        return False, f'{data_json["original_id"]}: {response.choices[0].message.content}'


In [208]:
def csv_converter(csv_file, jsonl_file):
    with open(csv_file, 'r', encoding='utf-8') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        with open(jsonl_file, 'w', encoding='utf-8') as jsonl_file:
            for row in csv_reader:
                json_line = json.dumps(row, ensure_ascii=False)
                jsonl_file.write(json_line + '\n')

csv_converter("data.csv", "converted_data")

In [209]:
def check_answers():
    binary_answer = []
    correct_answers = []

    with open("/Users/tollymon/Desktop/research_mars/converted_data", "r", encoding="utf-8") as f:
        for line in f:
            data_json = json.loads(line.strip())
            
            messages=[
                {"role": "system", "content": "You are an expert Lojban speaker"},
                {"role": "user", "content": f"{data_json['prompt']}"}
            ]
            b_ans, corr_ans = generate_response(messages, data_json)
            binary_answer.append(b_ans)
            correct_answers.append(corr_ans)
    
    return binary_answer, correct_answers


In [None]:
def generate_response_cot(messages, data_json):
    response_json_schema = {
    "type": "object",
    "properties": {
        "explanation": {
            "type": "string",
            "description": "A step-by-step explanation of how the problem was solved."
        },
        "output": {
            "type": "string",
            "description": "The final string result of the problem."
        },
    },
    "required": ["explanation", "output"]}

    response = openai_client.chat.completions.create(
    model="o4-mini-2025-04-16",
    messages = messages,
    response_format= {"type": "json_object"},
    # response_format={
    #     "type": "json_object",
    #     "schema": response_json_schema,
    #     "strict": True
    # },
    )

    parsed_response_content = json.loads(response.choices[0].message.content)
    output_value = parsed_response_content.get("answer")
    
    if (output_value == data_json["original_key"]) and (output_value == data_json["validator_answer"]):
        # return True, f'{data_json["original_id"]}: {output_value}', response.choices[0].message.content
        return True, response.choices[0].message.content
    else:
        return False, response.choices[0].message.content


In [311]:
def check_answer_cot():
    correct_answers = []
    cot_answers = []
    binary_answer = []
    choice_a = ""
    choice_b = ""
    pattern = r'"(.*?)"'

    with open("/Users/tollymon/Desktop/research_mars/converted_data", "r", encoding="utf-8") as f:
        for line in tqdm(f, desc="Processing"):
            data_json = json.loads(line.strip())
            lines = data_json["prompt"].splitlines()

            for l in lines:
                if l.startswith("(A)"):
                    choice_a = l
                elif l.startswith("(B)"):
                    choice_b = l
    
            matches = re.findall(pattern, data_json["prompt"])

            prompt = f"""
            Consider the English sentence "{matches[0]}" 
            Which of the following is a better translation of this sentence into standard Lojban? 
            {choice_a}  
            {choice_b} 
            Exactly one of the answers above is correct. Please think aloud and show your work. At the end of your response, please indicate the single letter corresponding to the correct answer (A or B) in JSON format.
            Answer: Let's think step by step.
            """
    
            messages=[
                {"role": "system", "content": "You are an expert Lojban speaker"},
                {"role": "user", "content": f"{prompt}"},
            ]
            # b_ans, output_model, cot_model = generate_response_cot(messages, data_json)
            generate_response_cot(messages, data_json)
            # correct_answers.append(output_model)
            # cot_answers.append(cot_model)
            # binary_answer.append(b_ans)
    
    # return correct_answers, binary_answer, cot_answers


In [312]:
# final_answer_cot, final_binary, final_cot = check_answer_cot()

check_answer_cot()

Processing: 1it [00:09,  9.21s/it]

B


Processing: 2it [00:19,  9.88s/it]

A


Processing: 2it [00:22, 11.25s/it]


KeyboardInterrupt: 

In [None]:
true_answer_cot = sum(final_binary)
true_answer_cot/len(final_answer_cot)

0.4032258064516129

In [257]:
print(final_cot[0])

To determine which Lojban translation (A or B) best captures the meaning of the English sentence "I think we need to hurry up," we should break down both options and analyze their components.

1. The English sentence implies a cognitive process ("I think") followed by a necessity ("we need to hurry up"). 

2. Now let's analyze the options:
   - **Option A:** "mi pensi lo du'u mi'a nitcu lo ka sutryze'a"
     - "mi" means "I."
     - "pensi" means "to think."
     - "lo du'u" translates to "the thing that is a statement," which is used to introduce what is being thought about.
     - "mi'a" means "we" (the inclusive "we" that includes the speaker).
     - "nitcu" means "to need."
     - "lo ka" signifies a property or quality.
     - "sutryze'a" roughly means "to hurry up" or "to be urgent."
     - So, roughly, this sentence translates to "I think (that) we need the property of hurrying up."

   - **Option B:** "mi jinvi lo du'u mi'a nitcu lo ka sutryze'a"
     - "mi" again means "I."
 

In [None]:
final_binary_non_cot, final_non_cot = check_answers()
sum(final_binary_non_cot)/len(final_binary_non_cot)

0.4838709677419355

In [None]:
# both methods require clearning of the data generated, as they are mismatched

def cleaning():
    pass