# Simple Agentic RAG Workflow
This agentic workflow retrieves relevant information from a PDF, generates an answer with APA-style citations, and runs it through a quality inspector loop that iteratively refines the answer until it meets the user's requirements or reaches a maximum number of iterations.

![ACSHistorianAgentWorkflow.png](attachment:0bedb51d-d66a-44ca-81bb-d23148ca5f77.png)


# This example below is to make a basic Q&A interface based interface
- Built as an example to creating agentic workflows
- Allows a comparison of output against the ACS Historian GPT in the GPT Store for the same question.

In [None]:
# Import necessary packages
import os
from dotenv import load_dotenv
from pathlib import Path
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings  # Updated import
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.schema import HumanMessage

# Load environment variables from .env file
# create this file ".env" with one line: OPENAI_API_KEY=pasteyourkeyhere 
env_path = Path('.') / '.env'    
load_dotenv(dotenv_path=env_path)

# Get OpenAI API key from environment variable
openai_api_key = os.getenv('OPENAI_API_KEY')
if not openai_api_key:
    raise ValueError("No OpenAI API key found. Please set the OPENAI_API_KEY environment variable.")

In [None]:
# Step 1: Load and Split PDF Text
loader = PyPDFLoader('./history-of-the-acs.pdf')  # Update with your PDF file path
documents = loader.load()

# Add metadata (e.g., page numbers)
for doc in documents:
    doc.metadata['page_number'] = doc.metadata.get('page', None)
    doc.metadata['source'] = f"Page {doc.metadata['page_number']}"

# Split the documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

# Step 2: Generate Embeddings and Create Vector Store
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(texts, embeddings)

In [None]:
# Step 3: Set Up the Language Model and Prompts

# Main QA prompt
qa_prompt_template = """
You are an AI assistant tasked with answering questions based on the provided context. Provide detailed, comprehensive, and accurate answers that thoroughly explain the topic. Use clear and accessible language, avoiding jargon unless necessary. Include relevant dates, background information, examples, and additional context where appropriate. Pay special attention to chronological details and distinctions between planning, creation, and implementation dates.

Context:
{context}

Question: {question}

Provide a detailed and informative answer with in-text citations based on the context. Cite sources by mentioning the page numbers (e.g., (Page 3)). Aim for a response that fully covers the topic, ensuring all key dates and events are accurately represented. If the information is not found or if there's any ambiguity, clearly state this in your response.
"""

QA_PROMPT = PromptTemplate.from_template(qa_prompt_template)

# Quality Inspector prompt
inspector_prompt_template = """
You are a Quality Inspector Copy Editor with expertise in historical accuracy. Your job is to review the following answer for correctness, completeness, verbosity, and adherence to the request. Pay special attention to dates, chronological order of events, distinctions between planning, creation, and implementation, and the inclusion of sources.

Question: {question}

Context Summary:
{context_summary}

Answer to Review:
{answer}

Instructions:
- Ensure the answer is comprehensive and provides detailed information.
- Verify that all dates and chronological information are accurate and clearly presented.
- Check that in-text citations are included correctly (e.g., (Page 3)).
- Ensure key events and details are fully explained.
- If the answer is satisfactory, accurate, and sufficiently verbose, respond with "Approved".
- If the answer contains any inaccuracies or lacks detail, provide specific feedback on what needs to be corrected or expanded upon.

Your Response:
"""

INSPECTOR_PROMPT = PromptTemplate.from_template(inspector_prompt_template)

In [None]:
# Initialize the LLM
llm = ChatOpenAI(
    model_name='gpt-4o', #or choose whatever model you want to use
    temperature=0,
    max_tokens=1000 #increase for more verbose answers, this gives it ~750 words
)

In [None]:
# Step 4: Implement Contextual Compression for Better Document Retrieval
def get_compressed_retriever(vectorstore):
    extractor = LLMChainExtractor.from_llm(llm)
    compression_retriever = ContextualCompressionRetriever(
        base_retriever=vectorstore.as_retriever(
            search_type="mmr", search_kwargs={"k": 10}
        ),
        base_compressor=extractor
    )
    return compression_retriever

# Step 5: Define the Quality Inspection Function
def quality_inspect(question, context_summary, answer):
    inspector_input = INSPECTOR_PROMPT.format(
        question=question,
        context_summary=context_summary,
        answer=answer
    )
    inspection_result = llm.invoke(inspector_input)
    return inspection_result.content.strip()

# Step 6: Implement the Answer Function with Quality Inspection Loop
def answer_question(question, max_iterations=3):
    compressed_retriever = get_compressed_retriever(vectorstore)
    
    qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=compressed_retriever,
        return_source_documents=True,
        chain_type_kwargs={
            "prompt": QA_PROMPT,
            "document_variable_name": "context"
        }
    )

    iterations = 0
    approved = False
    answer = ""

    while not approved and iterations < max_iterations:
        iterations += 1
        result = qa_chain.invoke({"question": question})
        answer = result['answer']
        source_documents = result['source_documents']
        context_summary = "\n".join([doc.page_content for doc in source_documents])

        inspection = quality_inspect(question, context_summary, answer)

        if "Approved" in inspection:
            approved = True
        else:
            feedback = f"\n\nRevision Notes: {inspection}"
            # Adjust the question with feedback to guide the next iteration
            question = f"{question}\n\nPlease revise the answer, addressing the following feedback: {feedback}"

    # Extract and format sources
    sources = []
    for doc in source_documents:
        page = doc.metadata.get('page_number', 'Unknown')
        if f"Page {page}" not in sources:
            sources.append(f"Page {page}")
    sources_str = ', '.join(sources)

    final_answer = f"{answer}\n\nSources: {sources_str}"
    return final_answer

In [None]:
# Example Usage, change the answer and run this cell
initial_question = "When and why was the American Community Survey created?"
final_answer = answer_question(initial_question)
print(f"Answer:\n{final_answer}")