In [1]:
!pip install langchain langchain_openai langchain_chroma




In [2]:
import os
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate

In [7]:
from google.colab import userdata

# --------------------------------------------------
# 1. ENV SETUP
# --------------------------------------------------
load_dotenv()
# Get the API key from environment variables
openai_api_key = userdata.get("OPENAI_API_KEY")

In [8]:

# --------------------------------------------------
# 2. EMBEDDINGS + VECTOR STORE
# --------------------------------------------------
#embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=openai_api_key
)

documents = [
    "LangChain is an orchestration framework for large language models.",
    "RAG stands for Retrieval Augmented Generation.",
    "Vector databases store embeddings for similarity search.",
    "LLMs cannot access private data unless it is provided in the prompt."
]

vectorstore = Chroma.from_texts(
    texts=documents,
    embedding=embeddings,
    collection_name="documents"
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 2})


In [9]:
# --------------------------------------------------
# 3. PROMPT TEMPLATE
# --------------------------------------------------
prompt = ChatPromptTemplate.from_template("""
You are an expert assistant.
Answer the question using ONLY the context below.

Context:
{context}

Question:
{question}
""")

# --------------------------------------------------
# 4. LLM
# --------------------------------------------------
llm = ChatOpenAI(
    model="gpt-4o-mini",
    api_key=openai_api_key
)

# --------------------------------------------------
# 5. RAG FUNCTION
# --------------------------------------------------
def answer_question(question: str) -> str:
    docs = retriever.invoke(question)
    context = "\n".join(doc.page_content for doc in docs)

    chain = prompt | llm
    response = chain.invoke({
        "context": context,
        "question": question
    })

    return response.content


# --------------------------------------------------
# 6. RUN
# --------------------------------------------------
if __name__ == "__main__":
    question = "What is LangChain?"
    answer = answer_question(question)
    print("\nANSWER:\n", answer)



ANSWER:
 LangChain is an orchestration framework for large language models.
