## RAG System Implementation with Hurricane Helene Report (September 2024)

In [None]:
# === Importieren der Bibliotheken ===
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.retrievers import MultiQueryRetriever
import os


  from .autonotebook import tqdm as notebook_tqdm


# ## 1. Document Indexing with ChromaDB

In [None]:
# === Configuration ====
os.environ["GEMINI_API_KEY"] = "GEMINI_API_KEY"
PERSIST_DIR = "chroma_db_hurricane"

In [None]:
# === Load Hurricane Helene report ===
loader = PyPDFLoader("https://www.nhc.noaa.gov/data/tcr/AL092024_Helene.pdf")
documents = loader.load()


In [None]:
# === Split document into chunks ===
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n", "\n", " ", ""]
)
chunks = text_splitter.split_documents(documents)
print(f"Split document into {len(chunks)} chunks")


Split document into 233 chunks


In [None]:
# === Create vector store ===
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=embedding,
    persist_directory=PERSIST_DIR
)
vector_store.persist()

  vector_store.persist()


## 2. System Architecture with Gemini 1.5 Flash

In [None]:
# === Initialize LLM ===
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2)

In [None]:
# === Create conversation chain with memory ===
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key='answer'
)

  memory = ConversationBufferMemory(


In [None]:
# === Custom prompt template ===
template = """You are a hurricane response assistant. Answer the question based only on the following context:
{context}

Question: {question}
Answer:"""
QA_PROMPT = PromptTemplate.from_template(template)

retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 5, "fetch_k": 10}
)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": QA_PROMPT},
    return_source_documents=True
)

# ## 3. Experimentation & Testing

In [None]:
# === Questions that require the report ===
questions = [
    "What was the maximum wind speed recorded for Hurricane Helene?",
    "Which areas were most affected by the storm surge?",
    "What date did Hurricane Helene make landfall in Florida?",
    "How many emergency shelters were opened in response to the hurricane?",
    "What was the estimated economic damage from Hurricane Helene?"
]

# === Test without retrieval ===
print("Testing without retrieval:")
for question in questions:
    response = llm.invoke(question)
    print(f"Q: {question}\nA: {response.content}\n---")


Testing without retrieval:
Q: What was the maximum wind speed recorded for Hurricane Helene?
A: The maximum sustained wind speed recorded for Hurricane Helene (2018) was **115 mph (185 km/h)**.
---
Q: Which areas were most affected by the storm surge?
A: To answer your question accurately, I need to know *which* storm surge you're referring to.  There have been countless storm surges throughout history.  Please specify the storm (e.g., Hurricane Sandy, Hurricane Katrina, Cyclone Idai) and its location, and I can tell you which areas were most affected by its surge.
---
Q: What date did Hurricane Helene make landfall in Florida?
A: Hurricane Helene did not make landfall in Florida.  It stayed out over the Atlantic Ocean.
---
Q: How many emergency shelters were opened in response to the hurricane?
A: I do not have access to real-time information, including up-to-the-minute data on emergency shelters opened in response to a specific hurricane.  To find that information, you will need to s

In [None]:
# === Test with retrieval ===
print("\nTesting with retrieval:")
for question in questions:
    result = qa_chain({"question": question})
    print(f"Q: {question}\nA: {result['answer']}\nSources:")
    for doc in result['source_documents'][:2]:
        print(f"- {doc.metadata['source']}, page {doc.metadata.get('page', 'N/A')}")
    print("---")



Testing with retrieval:


  result = qa_chain({"question": question})


Q: What was the maximum wind speed recorded for Hurricane Helene?
A: The provided text mentions a wind gust of 87 kt at Bacon County Airport in Alma, Georgia.  Another source mentions a maximum surface wind speed of 48 kt with a 67 kt gust in Laurens.  There are also numerous other wind gusts reported throughout the text, but no single maximum wind speed is explicitly stated.
Sources:
- https://www.nhc.noaa.gov/data/tcr/AL092024_Helene.pdf, page 8
- https://www.nhc.noaa.gov/data/tcr/AL092024_Helene.pdf, page 5
---
Q: Which areas were most affected by the storm surge?
A: Areas south of Englewood to Bonita Beach, including Charlotte Harbor, experienced the most significant storm surge, with maximum inundation of 3 to 5 feet above ground level (AGL).  South of Bonita Beach, areas such as Naples, Marco Island, and Everglades City saw 2 to 4 feet AGL.
Sources:
- https://www.nhc.noaa.gov/data/tcr/AL092024_Helene.pdf, page 11
- https://www.nhc.noaa.gov/data/tcr/AL092024_Helene.pdf, page 8
---

# ## 4. (Bonus)

In [None]:
# === Metadata Filtering ===
for i, chunk in enumerate(chunks):
    chunk.metadata["chunk_id"] = i
    chunk.metadata["event"] = "Hurricane Helene 2024"
    chunk.metadata["date"] = "September 2024"


In [None]:
# === Multi-Query Retrieval ===
multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=vector_store.as_retriever(),
    llm=llm
)

In [None]:
# === Test advanced retrieval ===
advanced_qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=multi_query_retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": QA_PROMPT}
)

complex_question = "Compare the preparedness measures for Hurricane Helene with previous major hurricanes in Florida"
result = advanced_qa({"question": complex_question})
print(f"Q: {complex_question}\nA: {result['answer']}")

Q: Compare the preparedness measures for Hurricane Helene with previous major hurricanes in Florida
A: This document does not provide information on how preparedness measures for Hurricane Helene compared to those for previous major hurricanes in Florida.


# ## 5. Q&A System - Interactive Console 

In [31]:
def chat():
    print("Hurricane Analysis System (type 'exit' to quit)")
    while (q := input("Your question: ").strip()) and q.lower() not in ["exit", "quit"]:
        try:
            res = qa_chain({"question": q})
            print(f"\nAnswer: {res['answer']}\nSources:" + 
                  "\n".join(f"- {d.metadata.get('source','?')}, pg {d.metadata.get('page','?')}" 
                  for d in res.get('source_documents',[])[:2]))
        except Exception as e: print(f"Error: {e}")
        print("\n" + "-"*50)

if __name__ == "__main__": chat()

Hurricane Analysis System (type 'exit' to quit)

Answer: The provided text does not contain information about an evacuation plan for Miami-Dade County.
Sources:- https://www.nhc.noaa.gov/data/tcr/AL092024_Helene.pdf, pg 52
- https://www.nhc.noaa.gov/data/tcr/AL092024_Helene.pdf, pg 91

--------------------------------------------------


# ## 6. Observability with LangSmith

In [None]:
# === Configure LangSmith ===
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "hurricane-rag-system"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGSMITH_API_KEY"] = "LANGSMITH_API_KEY"  

In [None]:
# === Test with tracing ===
client = Client()
with trace("Basic QA Test", client=client) as run:
    result = qa_chain({"question": "What was the evacuation plan for Miami-Dade county?"})
    print(result["answer"])

This document does not contain information about Miami-Dade County's evacuation plan during Hurricane Helene.
