In [3]:
!pip install openai chromadb python-dotenv

Collecting chromadb
  Downloading chromadb-1.4.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.4.0-py3-none-any.whl.metadata (5.8 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.3-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.39.1-py3-none-any.whl.metadata (2.5 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading pypika-0.50.0-py2.py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import chromadb

In [6]:
from google.colab import userdata
load_dotenv()

# Get the API key from environment variables
openai_api_key = userdata.get("OPENAI_API_KEY")

In [7]:
# --------------------------------------------------
# 1. ENV + CLIENT SETUP
# --------------------------------------------------
load_dotenv()

client = OpenAI(api_key=openai_api_key)

In [8]:

# --------------------------------------------------
# 2. EMBEDDING FUNCTION
# --------------------------------------------------
def get_embedding(text: str) -> list[float]:
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return response.data[0].embedding



In [9]:
# --------------------------------------------------
# 3. VECTOR DATABASE SETUP (Chroma)
# --------------------------------------------------
chroma_client = chromadb.Client()

collection = chroma_client.get_or_create_collection(
    name="documents"
)

documents = [
    "LangChain is an orchestration framework for large language models.",
    "RAG stands for Retrieval Augmented Generation.",
    "Vector databases store embeddings for similarity search.",
    "LLMs cannot access private data unless it is provided in the prompt."
]

for idx, doc in enumerate(documents):
    collection.add(
        ids=[str(idx)],
        documents=[doc],
        embeddings=[get_embedding(doc)]
    )

In [11]:
# --------------------------------------------------
# 4. RETRIEVAL FUNCTION
# --------------------------------------------------
def retrieve_context(query: str, k: int = 2) -> str:
    query_embedding = get_embedding(query)

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=k
    )

    return "\n".join(results["documents"][0])


# --------------------------------------------------
# 5. LLM ANSWER FUNCTION
# --------------------------------------------------
def answer_question(question: str) -> str:
    context = retrieve_context(question)

    prompt = f"""
You are an expert assistant.
Answer the question using ONLY the context below.

Context:
{context}

Question:
{question}
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2
    )

    return response.choices[0].message.content


# --------------------------------------------------
# 6. RUN
# --------------------------------------------------
if __name__ == "__main__":
    question = "What is LangChain?"
    answer = answer_question(question)
    print("\nANSWER:\n", answer)



ANSWER:
 LangChain is an orchestration framework for large language models.
