This Notebook is a basic example of Retrieval Augmented Generation based on the following Langchain example https://python.langchain.com/v0.2/docs/tutorials/qa_chat_history/. It was created for an ARLIS RISC Demonstration in June 2024 by Alan McMillan, PhD.

In [None]:
! pip install langchain langchain-openai langchain-community langchain-chroma

In [None]:
# specify OpenAI API key
import os
os.environ["OPENAI_API_KEY"] = 'Insert_Key_Here'

In [None]:
# Import the WebBaseLoader class from the langchain_community.document_loaders module
# This class is used to load documents from a specified web page
from langchain_community.document_loaders import WebBaseLoader

# Create an instance of WebBaseLoader
# - The URL of the web page from which we want to load the document is provided as an argument
loader = WebBaseLoader("https://today.umd.edu/umd-awarded-record-setting-research-contract-worth-up-to-500m")

# Load the document from the specified URL
# The loaded content is stored in the 'data' variable
data = loader.load()

# 'data' now contains the text content of the page loaded from the specified URL

In [None]:
# Import the RecursiveCharacterTextSplitter class from the langchain_text_splitters module
# This class is used to split documents into smaller chunks of text
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Create an instance of RecursiveCharacterTextSplitter
# - chunk_size: the maximum size of each text chunk (here, 1000 characters)
# - chunk_overlap: the number of characters that overlap between consecutive chunks (here, 200 characters)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split the loaded document into chunks of text
# The 'data' variable contains the loaded document to be split
# The resulting chunks are stored in the 'splits' variable
splits = text_splitter.split_documents(data)

# 'splits' now contains the text chunks obtained from splitting the loaded document

In [None]:
# Import the Chroma class from the langchain_chroma module
# This class is used to interact with a vector database
from langchain_chroma import Chroma

# Import the OpenAIEmbeddings class from the langchain_openai module
# This class is used to compute embeddings for the documents
from langchain_openai import OpenAIEmbeddings

# Create an instance of Chroma and load the document chunks into the vector database
# - documents: the chunks of text to be stored in the vector database (here, 'splits')
# - embedding: the embedding model used to compute embeddings for the documents (here, 'OpenAIEmbeddings')
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(model="text-embedding-3-small"))

# Get a retriever from the vectorstore
# A retriever is used to search and retrieve relevant documents based on a query
retriever = vectorstore.as_retriever()

# 'retriever' now provides an interface to search and retrieve documents from the vector database

In [None]:
# Define the system prompt for the retrieval-augmented generation (RAG) task
# - This prompt instructs the assistant to use the retrieved context to answer questions concisely
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

# Import the ChatPromptTemplate class from the langchain_core.prompts module
# This class is used to create chat-based prompt templates
from langchain_core.prompts import ChatPromptTemplate

# Create a chat prompt template from the specified messages
# - The template consists of a system message and a human message
# - The system message uses the 'system_prompt' defined earlier
# - The human message includes a placeholder '{input}' for the user's question
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# 'prompt' now contains the chat prompt template for the RAG task

In [None]:
# Import the ChatOpenAI class from the langchain_openai module
# This class is used to define the language model (LLM) for the task
from langchain_openai import ChatOpenAI

# Create an instance of ChatOpenAI
# - model: the specific model to use (here, "gpt-3.5-turbo-0125")
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

# Import the create_stuff_documents_chain function from the langchain.chains.combine_documents module
# This function is used to create a chain for combining document chunks and answering questions
from langchain.chains.combine_documents import create_stuff_documents_chain

# Create a question-answering chain using the LLM and the prompt template
# - llm: the language model to use for answering questions
# - prompt: the prompt template to guide the LLM in generating answers
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# Import the create_retrieval_chain function from the langchain.chains module
# This function is used to create a retrieval-augmented generation (RAG) chain
from langchain.chains import create_retrieval_chain

# Create a RAG chain using the retriever and the question-answering chain
# - retriever: the retriever for searching and retrieving relevant documents
# - question_answer_chain: the chain for combining document chunks and answering questions
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# 'rag_chain' now contains the retrieval-augmented generation chain for the task

In [None]:
# Create a utility function to wrap print and wrap text for Colab

# Import the textwrap module from the standard library
# textwrap is used to wrap text into paragraphs with a specified width
import textwrap

# Print the response with text wrapping for better display
# - textwrap.fill: Wraps the input text to the specified width (50 characters in this case)
def print_wrapped( text, width=50 ):
  print(textwrap.fill( text, width=width))

In [None]:
# Attempt to answer the question directly using the LLM without using retrieval-augmented generation (RAG)

# Invoke the language model (LLM) with a direct question
# - The LLM is provided with a list of messages in a conversation format
# - The first message is a system message setting the context for the assistant
# - The second message is a human message asking the specific question
no_rag_response = llm.invoke([
    ("system", "You are an assistant for question-answering tasks."),
    ("human", "What did ARLIS announce on 5/28/2024?"),
])

# Print the content of the response from the LLM
# - 'no_rag_response.content' contains the generated answer from the LLM
print_wrapped(no_rag_response.content)

In [None]:
# Attempt to answer the question using retrieval-augmented generation (RAG)

# Invoke the RAG chain with the input question
# - The input is a dictionary with the key "input" containing the specific question
response = rag_chain.invoke({"input": "What did ARLIS announce on 5/28/2024?"})

# Print the content of the response from the RAG chain
# - 'response["answer"]' contains the generated answer from the RAG chain
print_wrapped(response["answer"])