In [9]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv('API_KEY')

In [10]:
client = OpenAI(api_key=api_key)

In [11]:

def ask_chatgpt_4(prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
        model="gpt-4o"
    )
    return chat_completion

In [8]:
ans = ask_chatgpt_4("What is the meaning to life")
print(ans)

ChatCompletion(id='chatcmpl-9VXF9n2fFCFrimgSuQQeqBDQQHO5h', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="The question of the meaning of life has been a central philosophical and existential inquiry for centuries, and it often varies depending on cultural, religious, personal, and philosophical perspectives. Here are a few broad perspectives:\n\n1. **Religious Views**:\n   - **Christianity**: The purpose of life is often seen as knowing, loving, and serving God and preparing for an eternal life with Him.\n   - **Islam**: Life is viewed as a test from God, where individuals must live righteously according to the teachings of the Qur'an to earn a place in the afterlife.\n   - **Hinduism**: Life is about fulfilling one’s dharma (duties) and striving towards moksha (liberation from the cycle of rebirth).\n   - **Buddhism**: The goal is to attain enlightenment (Nirvana) through the Eightfold Path, ending the cycle of suffering and rebir

In [11]:
print(ans.choices[0].message.content)

The question of the meaning of life has been a central philosophical and existential inquiry for centuries, and it often varies depending on cultural, religious, personal, and philosophical perspectives. Here are a few broad perspectives:

1. **Religious Views**:
   - **Christianity**: The purpose of life is often seen as knowing, loving, and serving God and preparing for an eternal life with Him.
   - **Islam**: Life is viewed as a test from God, where individuals must live righteously according to the teachings of the Qur'an to earn a place in the afterlife.
   - **Hinduism**: Life is about fulfilling one’s dharma (duties) and striving towards moksha (liberation from the cycle of rebirth).
   - **Buddhism**: The goal is to attain enlightenment (Nirvana) through the Eightfold Path, ending the cycle of suffering and rebirth.

2. **Philosophical Perspectives**:
   - **Existentialism**: Life has no inherent meaning, and it is up to each individual to create their own purpose through acti

In [12]:

def ask_chatgpt_3(prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
        model="gpt-3.5-turbo-0125"
    )
    return chat_completion


In [17]:
import json
from tqdm import tqdm

def get_llm_output(prompt: str):
    gpt4_answer = ask_chatgpt_4(prompt)
    text4 = gpt4_answer.choices[0].message.content
    gpt3_answer = ask_chatgpt_3(prompt)
    text3 = gpt3_answer.choices[0].message.content
    return text4, text3

prompt_dirs = [
    "spatial-physical",
    "physical-temporal",
    "spatial-temporal",
    "spatial-temporal-physical",
]

for dir in prompt_dirs:
    prompts_json_filename = f"../{dir}/{dir}.json"
    with open(prompts_json_filename, "r") as prompts_file:
        prompts = json.load(prompts_file)
        for prompt in tqdm(prompts):
            gpt4_out, gpt3_out = get_llm_output(prompt["prompt"])
            responses = prompt.get("responses", {})
            responses["ChatGPT4o"] = gpt4_out
            responses["ChatGPT3.5"] = gpt3_out
            prompt["responses"] = responses
    with open(prompts_json_filename, "w") as new_prompts_file:
        json.dump(prompts, new_prompts_file, indent=2, ensure_ascii=False)


100%|██████████| 14/14 [00:23<00:00,  1.65s/it]
100%|██████████| 13/13 [00:13<00:00,  1.01s/it]
100%|██████████| 14/14 [00:15<00:00,  1.08s/it]
100%|██████████| 15/15 [00:14<00:00,  1.01it/s]


In [14]:
import json
from tqdm import tqdm

def get_llm_output(prompt: str):
    gpt4_answer = ask_chatgpt_4(prompt)
    text4 = gpt4_answer.choices[0].message.content
    gpt3_answer = ask_chatgpt_3(prompt)
    text3 = gpt3_answer.choices[0].message.content
    return text4, text3

prompt_dirs = [
    "spatial-physical",
    "physical-temporal",
    "spatial-temporal",
    "spatial-temporal-physical",
]

for dir in prompt_dirs:
    prompts_json_filename = f"../{dir}/{dir}-control.json"
    with open(prompts_json_filename, "r") as prompts_file:
        prompts = json.load(prompts_file)
        for prompt in tqdm(prompts):
            gpt4_out, gpt3_out = get_llm_output(prompt["prompt"])
            responses = prompt.get("responses", {})
            responses["ChatGPT4o"] = gpt4_out
            responses["ChatGPT3.5"] = gpt3_out
            prompt["responses"] = responses
    with open(prompts_json_filename, "w") as new_prompts_file:
        json.dump(prompts, new_prompts_file, indent=2, ensure_ascii=False)


100%|██████████| 14/14 [00:14<00:00,  1.04s/it]
100%|██████████| 13/13 [00:13<00:00,  1.04s/it]
100%|██████████| 14/14 [00:14<00:00,  1.06s/it]
100%|██████████| 15/15 [00:14<00:00,  1.03it/s]


In [20]:
# calculate accuracy

prompt_dirs = [
    "spatial-physical",
    "physical-temporal",
    "spatial-temporal",
    "spatial-temporal-physical",
]

for dir in prompt_dirs:
    prompts_json_filename = f"../{dir}/{dir}-control.json"
    total_responses = 0
    correct_GPT4o_responses = 0
    correct_GPT35_responses = 0
    with open(prompts_json_filename, "r") as prompts_file:
        prompts = json.load(prompts_file)
        for prompt in tqdm(prompts):
            human_annotation = prompt.get("human_annotation", 0)
            responses = prompt.get("responses", {})
            GPT4o = int(responses["ChatGPT4o"])
            GPT35 = int(responses["ChatGPT3.5"])
            total_responses += 1
            if GPT4o == human_annotation:
                correct_GPT4o_responses += 1
            if GPT35 == human_annotation:
                correct_GPT35_responses += 1
            prompt["responses"] = responses
    print(f"{dir}: GPT4o accuracy {correct_GPT4o_responses/total_responses}")
    print(f"{dir}: GPT35 accuracy {correct_GPT35_responses/total_responses}")

100%|██████████| 14/14 [00:00<00:00, 187007.18it/s]


spatial-physical: GPT4o accuracy 0.5714285714285714
spatial-physical: GPT35 accuracy 0.07142857142857142


100%|██████████| 13/13 [00:00<00:00, 104057.16it/s]


physical-temporal: GPT4o accuracy 0.6923076923076923
physical-temporal: GPT35 accuracy 0.6153846153846154


100%|██████████| 14/14 [00:00<00:00, 269358.97it/s]


spatial-temporal: GPT4o accuracy 0.35714285714285715
spatial-temporal: GPT35 accuracy 0.35714285714285715


100%|██████████| 15/15 [00:00<00:00, 121691.61it/s]

spatial-temporal-physical: GPT4o accuracy 0.26666666666666666
spatial-temporal-physical: GPT35 accuracy 0.2



