In [1]:
from dotenv import load_dotenv
from openai import OpenAI
import numpy as np
import os
from sklearn.metrics.pairwise import cosine_similarity
import json

gen_embeddings_from_scratch = False

# load openAI API key
load_dotenv()
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))


def gen_embedding(text):
    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-large",
    )
    return np.array(response.data[0].embedding)


def load_embeddings_from_file():
    with open("bill_embeddings.jsons", "r") as file:
        bill_embeddings = []
        for line in file:
            json_object = json.loads(line)
            bill_embeddings.append(
                (
                    json_object["billId"],
                    json_object["title"],
                    np.array(json_object["embedding"]),
                )
            )
        return bill_embeddings


# or load them
if not gen_embeddings_from_scratch:
    bill_embeddings = load_embeddings_from_file()
    print("Embeddings loaded! ✅")


# Compute cosine similarity
def cosine_sim(embedding1, embedding2):
    return cosine_similarity([embedding1], [embedding2])[0][0]


# Search function
def search_bills(query, bill_embeddings):
    query_embedding = gen_embedding(query)
    similarities = [
        (bill_id, title, cosine_sim(query_embedding, embedding))
        for bill_id, title, embedding in bill_embeddings
    ]
    # Sort by similarity
    similarities.sort(key=lambda x: x[2], reverse=True)
    # Return the most relevant bill
    return similarities[0:5]


# Interactive search with top 5 ranking
while True:
    query = input("Enter a query: ")
    if query == "exit":
        break
    results = search_bills(query, bill_embeddings)
    print(f"Query: {query}")
    print("Top 5 results:")
    for i, (bill_id, title, similarity) in enumerate(results):
        print(f"{i+1}. {title} (similarity: {similarity:.2f})")

Embeddings loaded! ✅
Query: 
Top 5 results:
1. Condemning antisemitism on college campuses. (similarity: 0.17)
2. Condemning antisemitism on college campuses. (similarity: 0.16)
3. Equality Act (similarity: 0.16)
4. SALT Deductibility Act (similarity: 0.16)
5. Reaffirming the support of the United States to our strongest ally in the region, Israel, and recognizing the authoritarian and extremist regime of the Islamic Republic of Iran as a threat to Israel, the region, the United States, and global stability. (similarity: 0.16)
