In [None]:
#####Mini-RAG with REAL embeddings

In [2]:
documents = [
    {"id": "doc1", "text": "Python is a programming language."},
    {"id": "doc2", "text": "The sky is blue during the day."},
    {"id": "doc3", "text": "Dogs are loyal animals."}
]


In [22]:
query = "What is A?"


In [5]:
from openai import OpenAI
import math
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI()


In [6]:
doc_embeddings = {}

for doc in documents:
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=doc["text"]
    )
    doc_embeddings[doc["id"]] = response.data[0].embedding


In [11]:
query_embedding = client.embeddings.create(
    model="text-embedding-3-small",
    input=query
).data[0].embedding


In [12]:
def cosine_similarity(a, b):
    dot = sum(x * y for x, y in zip(a, b))
    mag_a = math.sqrt(sum(x * x for x in a))
    mag_b = math.sqrt(sum(y * y for y in b))
    return dot / (mag_a * mag_b)


In [13]:
scored_docs = []

for doc in documents:
    score = cosine_similarity(
        query_embedding,
        doc_embeddings[doc["id"]]
    )
    scored_docs.append((score, doc["text"]))


In [14]:
scored_docs.sort(reverse=True, key=lambda x: x[0])


In [15]:
for score, text in scored_docs:
    print(f"{score:.3f} → {text}")


0.653 → Python is a programming language.
0.091 → Dogs are loyal animals.
0.076 → The sky is blue during the day.


In [16]:
top_doc = scored_docs[0][1]
print("\nRetrieved document:")
print(top_doc)



Retrieved document:
Python is a programming language.


In [17]:
##### Generation

In [26]:
top_doc = scored_docs[0][1]


In [27]:
prompt = f"""
You are answering a question using ONLY the context below.
If the answer is not in the context, say "I don't know".

Context:
{top_doc}

Question:
{query}
"""


In [28]:
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "user", "content": prompt}
    ],
    temperature=0
)


In [29]:
answer = response.choices[0].message.content
print("Answer:")
print(answer)


Answer:
I don't know.


In [31]:
# Embeddings
# Embeddings beat keywords because they capture semantic meaning, not exact words.
# Embeddings beat keywords because they retrieve by meaning, not by word match
# Similar meanings → similar vectors
# Zero word overlap still works
# Embeddings matter because they enable semantic search, retrieving text by meaning rather than exact words, 
# even with zero word overlap.



# Cosine understanding
# Cosine similarity measures how aligned two meanings are, ignoring length.
#Cosine similarity measures:
# --Directional alignment, not magnitude, --Whether two vectors point the same way in meaning space
# Why this matters: Sentence length doesn’t matter, Extra words don’t distort similarity



# Chunking
# Chunking prevents meaning dilution,  allows precise retrieval of facts.
# Preserves local meaning, Makes retrieval precise, Enables fact-level search


# RAG Failure 
# Wrong chunks are retrieved, Right chunks are split badly, Query is vague, Prompt allows guessing
# RAG fails silently when retrieval is wrong and the model is allowed to guess.

# RAG debug it logically
# Always debug RAG in this order: query → retrieved chunks → answer.
# Debug order must always be:
# Query (is it clear?)
# Retrieved chunks (are they relevant?)
# Prompt constraints (is guessing allowed?)
# Final answer

#How retrieval works?
 # Retrieval works by chunking documents, embedding them and the query using the same model,
 # then ranking chunks by cosine similarity to the query vector.


# More context is not always better.
# Correct context is better.