# RAG

## Loading packages

In [2]:
import os

from langchain import hub
from langchain_community.document_loaders import UnstructuredPDFLoader, DirectoryLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [4]:
# Store Access Token as an environment variable
import getpass
import os

if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")

## Loading documents

 We first load sample document from the blog post contents by BeautifulSoup parser. Then, we will split the documents.

In [5]:
# Load documents
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

assert len(docs) == 1
print(f"Total characters: {len(docs[0].page_content)}")



Total characters: 43130


In [6]:
# Splitting documents
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 66 sub-documents.


## Setting up vector store

We use Chroma as vectorstore and Groq as LLM. Since Groq does not provide embeddings, we will use HuggingFaceEmbeddings instead.

In [7]:
# Location of the database
persist_directory = "doc_db"

In [8]:
embedding = HuggingFaceEmbeddings()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
vectorstore = Chroma.from_documents(
    documents=all_splits,
    embedding=embedding,
    persist_directory=persist_directory
)

## Create retrieval chain

In [10]:
retriever = vectorstore.as_retriever()

In [11]:
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0
)

In [12]:
# Define prompt for question-answering
# The template is: "Answer any use questions based solely on the context below:"
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

combine_docs_chain = create_stuff_documents_chain(
    llm, retrieval_qa_chat_prompt
)



In [13]:
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

## Run query

In [14]:
query = "What does the document say about LLM?"
response = retrieval_chain.invoke({"input":query})

In [15]:
response

{'input': 'What does the document say about LLM?',
 'context': [Document(id='a329fd0a-b5f8-46d7-b526-7df16c47e95f', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 20470}, page_content='API-Bank (Li et al. 2023) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, schedule management, health data management, account authentication workflow and more. Because there are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.'),
  Document(id='ca89ce20-7767-4965-989b-c4f3dd189eea', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 20470}, page

It runs successfully with relevent answers.  
Check response['answer'] for response from the rag application.

## Set permission restriction

This part is an extra demo showing how to add access restriction to the rag application.

In [46]:
from langchain_core.documents import Document

# Add sample documents with permission control
document1 = Document(
    page_content="This is a report for finance department. The revenue of 2024 financial year is 100 million.",
    metadata={"allowed_users": "user1",
              "departments": "finance"}
)

document2 = Document(
    page_content="This is a report for HR department. The total employees at the end of 2024 is 10 thousands.",
    metadata={"allowed_users": "user1",
              "departments": "HR"}
)

In [47]:
vectorstore.add_documents([document1, document2])

['2bb2cd0d-9ace-4347-99ee-86a89b103c11',
 '4c464c98-affc-44d7-a0ad-136737f50060']

In [62]:
# Permission of current user
user_permissions = {
    "user_id": ["user1"],
    "departments": ["finance"],
}

In [66]:
# Build filter for Chroma, loaded from current user permissions
access_filter = {
    "$and": [
        {"allowed_users": {"$in": user_permissions["user_id"]}},
        {"department": {"$in": user_permissions["departments"]}},
    ]
}

retriever = vectorstore.as_retriever(
    search_kwargs={
        "k": 10,
        "filter": access_filter  # Enforces access control
    }
)

# Create a new retrieval chain with restriction to current user
retrieval_chain_with_restriction = create_retrieval_chain(retriever, combine_docs_chain)

In [58]:
query = "What is the revenue in 2024?"
response = retrieval_chain_with_restriction.invoke({"input":query})

In [59]:
response['answer']

'The revenue of 2024 financial year is 100 million.'

In [77]:
query = "How many employees in 2024?"
response = retrieval_chain_with_restriction.invoke({"input":query})

In [78]:
response

{'input': 'How many employees in 2024?',
 'context': [],
 'answer': "I don't have any information about the number of employees in 2024. The provided context is empty."}

The response showing the search is correctly limited due to departments restriction.  
Let's compare with rag search without restriction below.

In [79]:
query = "How many employees in 2024?"
response = retrieval_chain.invoke({"input":query})

In [81]:
response['answer']

'There are 10,000 employees at the end of 2024.'