In [1]:
import os
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Set up OpenAI API key
import getpass
os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [2]:
# Load the PDF document
file_path = "Resilient_Los_Angeles_Plan.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

# Split the document into smaller chunks for embedding
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Create vector store and retriever
vectorstore = InMemoryVectorStore.from_documents(
    documents=splits, embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

# Read the system prompt from a Markdown (.md) file
prompt_file_path = "summary_tool_system_prompt.md"
if os.path.exists(prompt_file_path):
    with open(prompt_file_path, "r") as file:
        system_prompt = file.read()
else:
    raise FileNotFoundError(f"The specified file was not found: {prompt_file_path}")

# Ensure the system prompt includes {context} for document input
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# Create the retrieval and question-answering chains
llm = ChatOpenAI(model="gpt-4o")
question_answer_chain = create_stuff_documents_chain(llm, prompt, document_variable_name="context")
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# Load questions from a Markdown file
questions_file_path = "summary_tool_questions.md"
if os.path.exists(questions_file_path):
    with open(questions_file_path, "r") as file:
        questions = [line.strip() for line in file.readlines() if line.strip()]
else:
    raise FileNotFoundError(f"The specified file was not found: {questions_file_path}")

# Iterate through questions and invoke the RAG chain for each
for question in questions:
    result = rag_chain.invoke({"input": question})
    # Extract only the answer from the result and print it
    answer = result["answer"]
    print(f"Answer: {answer}\n")


Answer: The plan provides information about the city of Los Angeles. Los Angeles is described as having over four million residents, with over one million buildings, and 500,000 active businesses. The city spans nearly 500 square miles, indicating its vast geographical area *(Los Angeles’ Health Atlas and other City resources; Page number not provided)*. However, specific details about major industries or per capita income are not included in the provided excerpts from the plan.

Answer: The plan addresses resilience, mitigation, and adaptation through various initiatives and strategies, as outlined below:

### **Resilience:**
- The plan emphasizes incorporating resilience as a guiding principle into government planning documents, specifically the General Plan and Zoning Code. This is to ensure that land-use decisions enhance the ability of individuals, neighborhoods, and economic systems to recover from disasters, climate change, and economic shifts *(Goal 8, Page 44)*.
- The plan out