# Adaptive RAG

Aiming to use both query analysis and active/self-corrective RAG

In [None]:
%pip install -U langchain_community tiktoken langchain-google-genai langchain-huggingface langchainhub chromadb langchain langgraph tavily-python sentence-transformers

In [1]:
import getpass
import os

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = input(f"{var}: ")

_set_env("GEMINI_API_KEY")
_set_env("TAVILY_API_KEY")


### Create Index

Setting up a vector database using **HuggingFace** for embeddings(Free the model will be cached to your machine) and **Chroma vector database**. Data will be retrieved directly from the URLs specified.


In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

# Setting up embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load the documents
docs = [WebBaseLoader(url).load() for url in urls]
# Flattening the docs into docs_list. From - [[doc1], [doc2], [doc3]] to [doc1, doc2, doc3]
docs_list = [item for sublist in docs for item in sublist]
# This single line double for loop is equivalent to -
"""
docs_list = []
for sublist in docs:
    for item in sublist:
        docs_list.append(item)
"""

# Splitting the documents into manageable chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Create vectorstore
vectorstore = Chroma.from_documents(
    documents = doc_splits,
    collection_name = "rag-chroma",
    embedding = embeddings
)

retriever = vectorstore.as_retriever()