In [57]:
import os
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity

import dotenv
dotenv.load_dotenv()

client = OpenAI(
  api_key=os.environ.get("OPENAI_API_KEY"),
)

In [58]:
def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [59]:
def get_distance(text1, text2, model="text-embedding-3-small"):
   text1 = str(text1).lower()
   text2 = str(text2).lower()
   embedding1 = get_embedding(text1, model)
   embedding2 = get_embedding(text2, model)
   return cosine_similarity([embedding1], [embedding2])[0][0]

In [60]:
def get_cosine_similarity(text1, text2, model="text-embedding-3-small"):
   embedding1 = get_embedding(text1, model)
   embedding2 = get_embedding(text2, model)
   return client.embeddings.cosine_similarity(embedding1, embedding2).data[0]

In [61]:
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "",
        }
    ],
    model="gpt-4o",
)

str(chat_completion.choices[0].message.content)


'Hello! How can I assist you today?'

In [62]:

# go through sample.json and ask questions to gpt
import json
all_qs = json.load(open("filtered_train.json"))

# randomly sample 10 questions
import random
sample = random.sample(all_qs, 30)

print(sample)

output = []

sum_1 = 0
sum_2 = 0

for i in sample:
    # print("-"*20)
    ootb_llm_rseponse = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Answer the question as concisely as possible with ONLY one answer without any other text:  {i['nq_question']}",
            }
        ],
        model="gpt-4o"
    )
    
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Rewrite this question replacing all questions with a what, but retain the meaning by specifying what entity or what person or what timeframe the \"what\" answering. Also specify the year is 2018 is needed to answer a time-based question. The Question: {i['nq_question']}",
            }
        ],
        model="gpt-4o"
    )

    print(chat_completion.choices[0].message.content)


    final_answer = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Answer the question as concisely as possible with ONLY one answer without any other text:  {chat_completion.choices[0].message.content}",
            }
        ],
        model="gpt-4o"
    )


    ambig_answer_distance = 0
    disambig_answer_distance = 0
    for ans in i["nq_answer"]:
        curr_ambig_answer_distance = get_distance(ans, ootb_llm_rseponse.choices[0].message.content)
        curr_disambig_answer_distance = get_distance(ans, final_answer.choices[0].message.content)
        if curr_ambig_answer_distance > ambig_answer_distance:
            ambig_answer_distance = curr_ambig_answer_distance
        if curr_disambig_answer_distance > disambig_answer_distance:
            disambig_answer_distance = curr_disambig_answer_distance

    curr = {
        "data_id": i["nq_id"],
        "ambig_question": i["nq_question"],
        "ambig_prompt_response": ootb_llm_rseponse.choices[0].message.content,
        "disambig_question": chat_completion.choices[0].message.content,
        "disambig_prompt_response": final_answer.choices[0].message.content,
        "ground_truth": i["nq_answer"],
        "question_distance": get_distance(i["nq_question"], chat_completion.choices[0].message.content),
        "answer_distance": get_distance(ootb_llm_rseponse.choices[0].message.content, final_answer.choices[0].message.content),
        "ambig_answer_distance": ambig_answer_distance,
        "disambig_answer_distance": disambig_answer_distance
    }

    sum_1 += curr["ambig_answer_distance"]
    sum_2 += curr["disambig_answer_distance"]

    # print(f"question_distance: {get_distance(i["nq_question"], chat_completion.choices[0].message.content)}")
    # print(f"answer_distance: {get_distance(ootb_llm_rseponse.choices[0].message.content, final_answer.choices[0].message.content)}")
    # if i["nq_answer"] is list:
    # print(f"ambig_answer_distance: {get_distance(i["nq_answer"], ootb_llm_rseponse.choices[0].message.content)}")
    # print(f"disambig_answer_distance: {get_distance(i["nq_answer"], final_answer.choices[0].message.content)}")

    output.append(curr)

    # save output to json file
    json.dump(output, open("disambig_prompt.json", "w"))

    print(curr)

print("="*20)
print(f"Average ambig answer distance: {sum_1/len(sample)}")
print(f"Average disambig answer distance: {sum_2/len(sample)}")


[{'nq_id': '-2735858160795120633', 'nq_question': 'Where is the lowest elevation in the world?', 'nq_answer': ['Dead Sea'], 'ambig_questions': [{'question': 'Where is the lowest known point in the world?', 'answer': ['Challenger Deep']}, {'question': 'Where is the lowest elevation below sea level in the world?', 'answer': ['Dead Sea, Jordan']}]}, {'nq_id': '-5398436847463533808', 'nq_question': 'Who voices beast in beauty and the beast 2017?', 'nq_answer': ['Adam Mitchell', 'Dan Stevens'], 'ambig_questions': [{'question': 'Who voices old beast in beauty and the beast 2017?', 'answer': ['Daniel Jonathan Stevens', 'Dan Stevens']}, {'question': 'Who voices young beast in beauty and the beast 2017?', 'answer': ['Adam Mitchell']}]}, {'nq_id': '-2578020881292839975', 'nq_question': 'What season of blue bloods are we in?', 'nq_answer': ['eighth season'], 'ambig_questions': [{'question': 'What season of blue bloods are we in at the end of 2017?', 'answer': ['8']}, {'question': 'What season of 