In [1]:
import os
from openai import OpenAI
import numpy as np

import dotenv
dotenv.load_dotenv()

client = OpenAI(
  api_key=os.environ.get("OPENAI_API_KEY"),
)

def cosine_similarity(vec1, vec2):
    # Ensure the vectors are numpy arrays
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    
    # Compute the dot product and magnitudes
    dot_product = np.dot(vec1, vec2)
    magnitude = np.linalg.norm(vec1) * np.linalg.norm(vec2)
    
    # Prevent division by zero
    if magnitude == 0:
        return 0.0
    
    return dot_product / magnitude

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

def get_distance(text1, text2, model="text-embedding-3-small"):
   text1 = str(text1).lower().replace(".", "")
   text2 = str(text2).lower().replace(".", "")
   embedding1 = get_embedding(text1, model)
   embedding2 = get_embedding(text2, model)
   return cosine_similarity(embedding1, embedding2)

In [2]:
import json

def find_closest_match(i):
    provided_disambig_question = ""
    provided_disambig_answer = ""
    for options in i["ambig_questions"]:
        for answer in i["nq_answer"]:
            for option_of_answer in options["answer"]:
                if option_of_answer == answer:
                    provided_disambig_question = options["question"]
                    provided_disambig_answer = answer
                    return (provided_disambig_question, provided_disambig_answer)
                
    return ("", "")

train = json.load(open("../Experiments/mini_sample_input_1729208141.json"))


out = []

sum_q = 0
sum_a = 0

sum_ambig = 0
sum_disambig = 0

count = 0

for i in train:
    count += 1
    print(f"Processing {count}/{len(train)}")
    provided_disambig_question, provided_disambig_answer = find_closest_match(i)

    if provided_disambig_question == "":
        continue

    # print("\b found a match")

    ambig = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Answer the question as concisely as possible with ONLY one answer without any other text:  {i['nq_question']}",
            }
        ],
        model="gpt-4o",
    )

    disambig = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Answer the question as concisely as possible with ONLY one answer without any other text:  {provided_disambig_question}",
            }
        ],
        model="gpt-4o",
    )

    q_distance = get_distance(i['nq_question'], provided_disambig_question)
    a_distance = get_distance(ambig.choices[0].message.content, provided_disambig_question)

    sum_q += a_distance
    sum_a += q_distance

    ambig_answer_distance = 0
    disambig_answer_distance = 0

    for ans in i["nq_answer"]:
        curr_ambig_answer_distance = get_distance(ambig.choices[0].message.content, ans)
        curr_disambig_answer_distance = get_distance(disambig.choices[0].message.content, ans)
        if curr_ambig_answer_distance > ambig_answer_distance:
            ambig_answer_distance = curr_ambig_answer_distance
        if curr_disambig_answer_distance > disambig_answer_distance:
            disambig_answer_distance = curr_disambig_answer_distance

    sum_ambig += ambig_answer_distance
    sum_disambig += disambig_answer_distance

    curr = {
        'nq_id': i['nq_id'],
        'ambig_question': i['nq_question'],
        'ambig_llm_answer': ambig.choices[0].message.content,
        'disambig_question': provided_disambig_question,
        'disambig_llm_answer': disambig.choices[0].message.content,
        "question_distance": q_distance,
        "answer_distance": a_distance,
        "ambig_answer_distance": ambig_answer_distance,
        "disambig_answer_distance": disambig_answer_distance,
        'ground_truth': i['nq_answer'],
    }

    out.append(curr)

    json.dump(out, open('disambig_filter_with_our_sample.json', 'w'))

print(f"Number of entries with a clearly diambiguated question: {len(out)}")
print(f"Number of training entries: {len(train)}")
print(f"Average distance between questions: {sum_q / len(out)}")
print(f"Average distance betweemn their LLM responses: {sum_a / len(out)}")
print(f"Average ambiguous answer distance: {sum_ambig / len(out)}")
print(f"Average disambiguated answer distance: {sum_disambig / len(out)}")

Processing 1/1000
Processing 2/1000
Processing 3/1000
Processing 4/1000
Processing 5/1000
Processing 6/1000
Processing 7/1000
Processing 8/1000
Processing 9/1000
Processing 10/1000
Processing 11/1000
Processing 12/1000
Processing 13/1000
Processing 14/1000
Processing 15/1000
Processing 16/1000
Processing 17/1000
Processing 18/1000
Processing 19/1000
Processing 20/1000
Processing 21/1000
Processing 22/1000
Processing 23/1000
Processing 24/1000
Processing 25/1000
Processing 26/1000
Processing 27/1000
Processing 28/1000
Processing 29/1000
Processing 30/1000
Processing 31/1000
Processing 32/1000
Processing 33/1000
Processing 34/1000
Processing 35/1000
Processing 36/1000
Processing 37/1000
Processing 38/1000
Processing 39/1000
Processing 40/1000
Processing 41/1000
Processing 42/1000
Processing 43/1000
Processing 44/1000
Processing 45/1000
Processing 46/1000
Processing 47/1000
Processing 48/1000
Processing 49/1000
Processing 50/1000
Processing 51/1000
Processing 52/1000
Processing 53/1000
Pr