In [28]:
documents = [
    {"id": 1, "text": "Akheel is a Senior Software Engineer", "source": "blog"},
    {"id": 2, "text": "Akheel is a 29 year oldr", "source": "paper"},
    {"id": 3, "text": "Akheel lives in Dharwad", "source": "notes"}
]

In [29]:
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

In [30]:
from openai import OpenAI

client = OpenAI()

texts = [d["text"] for d in documents]

embeddings = client.embeddings.create(
    model="text-embedding-3-small",
    input=texts
)

vectors = [e.embedding for e in embeddings.data]


In [31]:
import math

def cosine_similarity(a, b):
    dot = sum(x*y for x, y in zip(a, b))
    mag_a = math.sqrt(sum(x*x for x in a))
    mag_b = math.sqrt(sum(y*y for y in b))
    return dot / (mag_a * mag_b)


In [39]:
query = "Where does Akheel live and what does he do?"


query_embedding = client.embeddings.create(
    model="text-embedding-3-small",
    input=query
).data[0].embedding


In [44]:
scored_docs = []

for doc, vec in zip(documents, vectors):
    score = cosine_similarity(query_embedding, vec)
    scored_docs.append((score, doc))

scored_docs.sort(reverse=True, key=lambda x: x[0])
top_docs = [doc for _, doc in scored_docs[:1]]
scored_docs

[(0.6256473817348721,
  {'id': 3, 'text': 'Akheel lives in Dharwad', 'source': 'notes'}),
 (0.5740510920020521,
  {'id': 1, 'text': 'Akheel is a Senior Software Engineer', 'source': 'blog'}),
 (0.5709957298018032,
  {'id': 2, 'text': 'Akheel is a 29 year oldr', 'source': 'paper'})]

In [45]:
retrieved_context = "\n".join(
    f"- {d['text']}" for d in top_docs
)

rag_prompt = f"""
You are an expert AI assistant.

Use only the following context to answer:
{retrieved_context}

Question:
{query}
"""


In [46]:
client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "user", "content": rag_prompt}
    ],
    temperature=0.2
)
raw_output = response.choices[0].message.content
raw_output


'Akheel lives in Dharwad. The context does not provide information about what he does.'