In [4]:
from openai import OpenAI
import os
import numpy as np

from dotenv import load_dotenv
load_dotenv()

client = OpenAI(
  api_key=os.environ.get("OPENAI_API_KEY"),
)

def cosine_similarity(vec1, vec2):
    # Ensure the vectors are numpy arrays
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    
    # Compute the dot product and magnitudes
    dot_product = np.dot(vec1, vec2)
    magnitude = np.linalg.norm(vec1) * np.linalg.norm(vec2)
    
    # Prevent division by zero
    if magnitude == 0:
        return 0.0
    
    return dot_product / magnitude

def get_embedding(text, model="text-embedding-3-large"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

def get_distance(text1, text2, model="text-embedding-3-large"):
   text1 = str(text1).lower()
   text2 = str(text2).lower()
   embedding1 = get_embedding(text1, model)
   embedding2 = get_embedding(text2, model)
   return cosine_similarity(embedding1, embedding2)

In [11]:
# load /Users/aryank/Developer/dmml-research/off-track exps/disambig_filter_with_our_sample.json into upper
# load /Users/aryank/Developer/dmml-research/Experiments/o_sample_output_1729209119.json into baseline

import json
import numpy as np
import pandas as pd

baseline = json.load(open('/Users/aryank/Developer/dmml-research/Experiments/o_sample_output_1729209119.json'))[:10]
upper = json.load(open('/Users/aryank/Developer/dmml-research/off-track exps/disambig_filter_with_our_sample.json'))[:10]

# sort basline by data_id attribute
baseline = sorted(baseline, key=lambda x: x['data_id'])

# sort upper by nq_id attribute
upper = sorted(upper, key=lambda x: x['nq_id'])

sum_question_distance = 0
sum_answer_distance = 0
sum_ambig_answer_distance = 0
sum_disambig_answer_distance = 0

for i in range(len(baseline)):
    print(f"Processing {i}")
    print(baseline[i]["llm_response"])
    print(upper[i]["disambig_llm_answer"])

    ambig_answer_distance = 0
    disambig_answer_distance = 0
    for ans in upper[i]["ground_truth"]:
        curr_ambig_answer_distance = get_distance(ans, baseline[i]["llm_response"])
        # curr_disambig_answer_distance = get_distance(ans, upper[i]["llm_response"])
        if curr_ambig_answer_distance > ambig_answer_distance:
            ambig_answer_distance = curr_ambig_answer_distance
        # if curr_disambig_answer_distance > disambig_answer_distance:
        #     disambig_answer_distance = curr_disambig_answer_distance

    sum_ambig_answer_distance += curr_ambig_answer_distance
    sum_disambig_answer_distance += upper[i]['disambig_answer_distance']

    sum_question_distance += upper[i]['question_distance']
    sum_answer_distance += get_distance(baseline[i]['llm_response'], upper[i]['disambig_llm_answer'])

print(f"Average question distance: {sum_question_distance / len(baseline)}")
print(f"Average answer distance: {sum_answer_distance / len(baseline)}")
print(f"Average ambig answer distance: {sum_ambig_answer_distance / len(baseline)}")
print(f"Average disambig answer distance: {sum_disambig_answer_distance / len(baseline)}")
    

Processing 0
The Marvelettes
The Marvelettes
Processing 1
Figaro
Figaro
Processing 2
12 sacks.
12 sacks.
Processing 3
New Year's Day
2015
Processing 4
Monument Avenue in Richmond, VA, was previously known for its statues of Confederate figures and Arthur Ashe, though most Confederate statues were removed by 2021.
Selena Gomez.
Processing 5
Mark Ruffalo.
Mark Ruffalo
Processing 6
The moon's gravitational pull.
Aaron Judge
Processing 7
16
2017
Processing 8
"Baby Shark Dance" by Pinkfong.
Atticus Finch
Processing 9
February 5, 2016.
Patrick Bergin
Average question distance: 0.9059289299755575
Average answer distance: 0.49447405057584454
Average ambig answer distance: 0.3811690162115744
Average disambig answer distance: 0.8053376999332093
