# Basic RAG
An LLM app using LangChain & Chroma. Built from this [LangChain RAG tutorial](https://python.langchain.com/v0.2/docs/tutorials/rag/)

In [None]:
%pip install langchain langchain_community langchain_chroma
%pip install -qU langchain-openai
%pip install python-dotenv

In [1]:
import os
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

llm = ChatOpenAI(model="gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))

# Indexing
The indexing phase of creating a RAG application includes loading, chunking, and indexing the contents of the source material.

In [None]:
import bs4
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain

# 1. Load
# WebBaseLoader is a Document Loader which uses urllib to load HTML from web URLs and 
# BeautifulSoup to parse it to text. BS keyword arguments are passed in to limit the
# classes loaded: Only keep post title, headers, and content from the full HTML.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# 2. Split
# Split the document into chunks of 1000 characters with 200 characters of overlap between chunks.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# 3. Store
# Store splits and their embeddings in a vector store. 
vectorstore = Chroma.from_documents(splits, OpenAIEmbeddings())

# Retrieval & Generation
For this exercise, create a simple application that takes a user question, searches for documents relevant to that question, passes the retrieved documents and initial question to a model, and returns an answer.

In [None]:
# 4. Retrieve
# A "retriever" is an object that returns Documents given a text query
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# 5. Generate
# Generate a response to the question using the retrieved documents.

llm = ChatOpenAI(model="gpt-4o-mini")

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# create_stuff_documents_chain: 
# specifies how retrieved context is fed into a prompt and LLM. 
# In this case, we will "stuff" the contents into the prompt -- i.e., we will include all 
# retrieved context without any summarization or other processing. It largely implements our 
# above rag_chain, with input keys context and input-- it generates an answer using retrieved 
# context and query.
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# create_retrieval_chain:
#  adds the retrieval step and propagates the retrieved context through 
# the chain, providing it alongside the final answer. It has input key input, and includes input, 
# context, and answer in its output.
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

query = input("> ")

response = rag_chain.invoke({"input": query})
# e.g. "What is Task Decomposition?"
# e.g. "How are tools used in LLMs?"

print(response["answer"])
print()

for counter, document in enumerate(response["context"], start=1):
    print(f"Document {counter}:")
    print(document)
    print()
