In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Capstone Project - SimEQ: Earthquake Crisis Response Simulation
This notebook simulates a GenAI-powered assistant that can:
1. Generate realistic earthquake scenarios using a disaster report (Document understanding)
2. Evaluate simulated responses (Gen AI Evaluation)
3. Simulate social media crisis messages (Few-shot prompting)
4. Analyze uploaded reports (Document Understanding, RAG, Embeddings, Vector Database)
5. Generate structured crisis response plans (Structured output/JSON mode)

In [2]:
!pip uninstall -qqy jupyterlab kfp  # Remove unused conflicting packages
!pip install -qU "google-genai==1.7.0" "chromadb==0.6.3"

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.1/611.1 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.9/100.9 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m5.9 MB/s[0

In [3]:
from google import genai
from google.genai import types

from IPython.display import Markdown

genai.__version__

'1.7.0'

In [4]:
# for kaggle notebook submission later 
from kaggle_secrets import UserSecretsClient

GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")

In [5]:
import os

client = genai.Client(api_key=GOOGLE_API_KEY)

for m in client.models.list():
    if "embedContent" in m.supported_actions:
        print(m.name)

models/embedding-001
models/text-embedding-004
models/gemini-embedding-exp-03-07
models/gemini-embedding-exp


In [6]:
!wget -nv -O gemini.pdf https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf

document_eval_file = client.files.upload(file='gemini.pdf')

2025-04-19 15:45:23 URL:https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf [7228817/7228817] -> "gemini.pdf" [1]


In [7]:
!pip install PyMuPDF  # Gemini do not have direct access to the file path, need to manually use python to pass contents

Collecting PyMuPDF
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m82.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDF
Successfully installed PyMuPDF-1.25.5


## 1. Generate realistic earthquake scenarios using a disaster report (Document understanding)
In this section, we evaluate document understanding by providing a disaster report as input to the model for scenario generation. The inclusion of the document helps guide the response toward more grounded and structured outputs.

For comparison, scenarios generated without the document tend to be less detailed and more generic.

We configure the generation parameters with temperature = 0.1 and top_p = 0.95. A lower temperature reduces randomness, helping the model produce more factual responses for simulating real-world disaster situations.

In [8]:
# Example
location = 'Los Angeles'
magnitude = 6.8
population_density = 'high'
time = '03:45 AM'

In [9]:
!wget -nv -O Myanmar.pdf https://prddsgofilestorage.blob.core.windows.net/api/event-featured-documents/file/2025_IFRC_MYANMAR_EQ_DISASTER_BRIEF_24H_Low_Res.pdf
document_sim_file = client.files.upload(file='Myanmar.pdf')

2025-04-19 15:45:31 URL:https://prddsgofilestorage.blob.core.windows.net/api/event-featured-documents/file/2025_IFRC_MYANMAR_EQ_DISASTER_BRIEF_24H_Low_Res.pdf [529736/529736] -> "Myanmar.pdf" [1]


In [10]:
request = f"""
You're a crisis simulator. Create a realistic earthquake scenario based on:
Location: {location}
Magnitude: {magnitude}
Population Density: {population_density}
Time: {time}
"""

model_config = types.GenerateContentConfig(temperature=0.1, top_p=0.95)

response = client.models.generate_content(
    model='gemini-2.0-flash',
    config=model_config,
    contents=[request, document_sim_file]) #contents=[request] uncomment this for runnning without document

final_resp = response.text
# print(final_resp)
Markdown(final_resp)

Okay, here's a crisis simulation based on your parameters:

**Scenario: Los Angeles Earthquake**

**Event:** A magnitude 6.8 earthquake strikes Los Angeles, California.

**Date & Time:** October 26, 2024, 03:45 AM PST

**Epicenter:**  Located approximately 8 miles (13 km) northeast of downtown Los Angeles, near the city of Pasadena. This places it directly under a densely populated area.

**Geological Context:** The earthquake occurs on a previously unknown fault line, branching off the Sierra Madre Fault Zone. This unexpected rupture contributes to the severity of the damage.

**Immediate Impacts:**

*   **Shaking Intensity:**  Widespread "violent" shaking (MMI VIII-IX) across the Los Angeles Basin, with "severe" shaking (MMI VII) felt as far as San Bernardino and Ventura counties.
*   **Infrastructure Damage:**
    *   **Buildings:** Widespread damage to older, unreinforced masonry buildings.  Modern buildings fare better but still experience significant structural and non-structural damage (broken windows, fallen facades, internal damage).  Some collapses occur, particularly in older residential areas and commercial districts.
    *   **Transportation:**  Major freeway overpasses and bridges sustain damage, leading to closures on key arteries like the I-5, I-10, and 101.  Surface streets are blocked by debris and fallen power lines.  Los Angeles International Airport (LAX) experiences runway damage and is temporarily closed.  The Metro Rail system suffers damage, with some sections collapsing.
    *   **Utilities:**  Widespread power outages across the city.  Water mains rupture, leading to water shortages and contamination concerns.  Natural gas lines break, causing fires in several neighborhoods.  Cell phone service is overwhelmed and unreliable.
*   **Casualties:**
    *   **Immediate:**  Significant injuries and fatalities are reported.  Hospitals are overwhelmed with patients.  Many people are trapped in collapsed buildings.
    *   **Estimates:** Initial estimates suggest hundreds of fatalities and thousands of injuries, but these numbers are expected to rise as search and rescue efforts progress.
*   **Emergency Response:**
    *   **Initial Chaos:**  The 911 system is overloaded.  First responders are hampered by damaged infrastructure and blocked roads.
    *   **Activation:**  The Los Angeles Fire Department (LAFD) and Los Angeles Police Department (LAPD) activate their emergency response plans.  The National Guard is mobilized.  FEMA is notified and begins preparing to deploy resources.
*   **Social Impacts:**
    *   **Panic and Fear:**  Widespread panic and fear among the population.  People are trapped in their homes, offices, and vehicles.
    *   **Displacement:**  Thousands of people are displaced from their homes due to damage or fear of aftershocks.  Shelters are quickly established but are soon overwhelmed.
    *   **Looting and Civil Unrest:**  Reports of looting and civil unrest begin to surface in some areas.

**Secondary Impacts:**

*   **Aftershocks:**  Numerous aftershocks, some of significant magnitude (5.0+), continue to rattle the region, causing further damage and hindering rescue efforts.
*   **Fires:**  Multiple fires break out due to ruptured gas lines and downed power lines.  The LAFD struggles to contain the blazes due to water shortages and blocked roads.
*   **Landslides:**  Heavy shaking triggers landslides in the Hollywood Hills and other hillside areas, damaging homes and blocking roads.
*   **Tsunami Threat:**  While the epicenter is inland, there is a concern about potential landslides into the ocean triggering localized tsunamis.  Coastal areas are put on alert.
*   **Economic Impact:**  The earthquake causes billions of dollars in damage.  Businesses are disrupted, and the tourism industry suffers a major blow.

**Challenges:**

*   **High Population Density:**  The high population density of Los Angeles makes rescue and relief efforts extremely challenging.
*   **Infrastructure Vulnerability:**  The aging infrastructure of Los Angeles is particularly vulnerable to earthquakes.
*   **Communication Breakdown:**  Communication systems are overwhelmed, making it difficult to coordinate rescue and relief efforts.
*   **Resource Scarcity:**  Resources such as water, food, and medical supplies are quickly depleted.
*   **Public Health Concerns:**  Concerns about the spread of disease due to water contamination and overcrowding in shelters.

**Potential Escalation:**

*   **Major Aftershock:**  A large aftershock could cause further collapses and increase casualties.
*   **Widespread Civil Unrest:**  If looting and civil unrest escalate, it could overwhelm law enforcement and hinder rescue efforts.
*   **Long-Term Displacement:**  Thousands of people could be permanently displaced from their homes, creating a long-term housing crisis.

**This scenario is designed to be a starting point.  Further details and complexities can be added to make the simulation even more realistic.**


## 2. Evaluate simulated responses (GenAI Evaluation)
In this section, we leverage GenAI-based evaluation to assess the quality of the simulated earthquake response outputs generated earlier.

We employ both pointwise and pairwise evaluation methods using Gemini to analyze the Accuracy and Groundedness, Relevance and Usefulness, Clarity and Structure, and Completeness of each response. This helps us identify which outputs are most effective for supporting disaster planning and decision-making.

#### Pointwise Evaluation

In [11]:
# Evaluate 
import enum

# Define the evaluation prompt
SIMU_PROMPT = """\
You are an expert evaluator in natural disaster response and AI systems. Your task is to evaluate the quality of AI-generated responses related to earthquake simulation scenarios.

You will be provided with a user input describing a simulation scenario or goal, and a response generated by an AI model. Your job is to analyze how well the AI's response meets the expectations of a useful, actionable, and accurate simulation output in the context of earthquake preparedness or emergency response.

Follow the evaluation criteria and assign a rating based on the rubric. Provide step-by-step reasoning for your score, and choose only from the defined rating scale.

# Evaluation
## Metric Definition
You are evaluating the **simulation quality and response utility** in the context of earthquake disaster scenarios. A strong response should provide accurate, grounded, and actionable insights or outputs based on the scenario described. The simulation should not introduce speculative or irrelevant data.

## Criteria
1. **Accuracy and Groundedness**: The response is factually correct and reflects the information contained in the prompt or assumed reasonable context (e.g., known seismic behavior, typical emergency protocols). It does not hallucinate or invent unsupported claims.
2. **Relevance and Usefulness**: The response provides relevant insights or actions that would be helpful in earthquake response, such as resource planning, risk zones, evacuation paths, or damage estimates.
3. **Clarity and Structure**: The information is clearly presented and logically organized, allowing emergency teams or planners to understand and use it quickly.
4. **Completeness**: The response addresses all aspects of the simulation request or scenario, without leaving critical details out.

## Rating Rubric
5: (Excellent). Fully accurate, highly relevant, clearly written, and complete.
4: (Good). Mostly accurate and relevant, clearly written, but may miss one minor detail.
3: (Fair). Partially accurate or relevant; some gaps in clarity or completeness.
2: (Poor). Lacks accuracy or misses critical elements; hard to use in a real-world scenario.
1: (Very Poor). Inaccurate or irrelevant; not suitable for real disaster response usage.

## Evaluation Steps
STEP 1: Read the scenario prompt and AI response.
STEP 2: Evaluate it using the four criteria: accuracy, relevance, clarity, and completeness.
STEP 3: Provide detailed reasoning.
STEP 4: Assign a rating from the rubric above.

# User Inputs and AI-generated Response
## User Inputs

### Prompt
{prompt}

## AI-generated Response
{response}
"""

# Define a structured enum class to capture the result.
class SummaryRating(enum.Enum):
  VERY_GOOD = '5'
  GOOD = '4'
  OK = '3'
  BAD = '2'
  VERY_BAD = '1'


def eval_sim(prompt, ai_response):
  """Evaluate the generated summary against the prompt used."""

  chat = client.chats.create(model='gemini-2.0-flash')

  # Generate the full text response.
  response = chat.send_message(
      message=SIMU_PROMPT.format(prompt=prompt, response=ai_response)
  )
  verbose_eval = response.text

  # Coerce into the desired structure.
  structured_output_config = types.GenerateContentConfig(
      response_mime_type="text/x.enum",
      response_schema=SummaryRating,
  )
  response = chat.send_message(
      message="Convert the final score.",
      config=structured_output_config,
  )
  structured_eval = response.parsed

  return verbose_eval, structured_eval


text_eval, struct_eval = eval_sim(prompt=[request, document_sim_file], ai_response=final_resp)
Markdown(text_eval)

STEP 1: The prompt requests a realistic earthquake scenario in Los Angeles with specific parameters. The AI provides a detailed simulation including the event, location, impacts, challenges, and potential escalations.

STEP 2:
*   **Accuracy and Groundedness:** The AI response uses realistic seismic terms (MMI scale), considers the geological context of the location (Sierra Madre Fault Zone), and factors in typical earthquake consequences (infrastructure damage, casualties, utility failures). The response logically connects the earthquake's magnitude and location to the expected damage and aftereffects.
*   **Relevance and Usefulness:** The AI provides highly relevant information for emergency response. It identifies key areas of concern (infrastructure, casualties, social impacts) and outlines the challenges and potential escalations that response teams should consider.
*   **Clarity and Structure:** The response is well-structured, using clear headings and bullet points to organize information. The writing is easy to understand, making it accessible for emergency planners and responders.
*   **Completeness:** The response covers all the requested parameters and provides a comprehensive overview of the earthquake scenario, including immediate impacts, secondary impacts, challenges, and potential escalations.

STEP 3: The response is very good and provides a comprehensive and realistic earthquake scenario. The detailed information about infrastructure damage, casualties, and potential escalations is highly valuable for emergency planning and response. It is also well-structured and easy to understand.

STEP 4: Rating: 5


#### Pairwise Evaluation
When simulating crisis responses using different source documents, the generated outputs can vary significantly. To assess the quality of these variations, pairwise evaluation is used.

In the code below, we compare two responses generated from different input documents. The evaluation process determines which response is more realistic, relevant, and useful for crisis response planning, based on predefined criteria.

In [12]:
# Wrap the model generation for easier implementation for pairwise evaluation

import functools

request = f"""
You're a crisis simulator. Create a realistic earthquake scenario based on:
Location: {location}
Magnitude: {magnitude}
Population Density: {population_density}
Time: {time}
"""

# @functools.cache
def answer_question(prompt, doc_file) :
  """Generate an answer to the question using the uploaded document and guidance."""
  model_config = types.GenerateContentConfig(temperature=0.1, top_p=0.95)
  response = client.models.generate_content(
    model='gemini-2.0-flash',
    config=model_config,
    contents=[prompt, doc_file])

  return response.text

# Test the function
final_resp = answer_question(request, document_sim_file)
Markdown(final_resp)

Okay, here's a crisis simulation based on your parameters:

**Scenario: Los Angeles Earthquake**

*   **Location:** Los Angeles, California, USA
*   **Magnitude:** 6.8
*   **Population Density:** High (especially in affected areas)
*   **Time:** 03:45 AM (Pacific Standard Time)
*   **Date:** October 26, 2024

**Initial Situation Report:**

**The Event:** At 03:45 AM PST, a magnitude 6.8 earthquake struck the Los Angeles metropolitan area. The epicenter is located approximately 10 miles (16 km) southwest of downtown Los Angeles, near Inglewood. The shallow depth of the quake (estimated at 7 miles/11 km) amplified the shaking intensity.

**Immediate Impacts:**

*   **Widespread Shaking:** Strong to very strong shaking (MMI VII-VIII) was felt across Los Angeles County, Orange County, and parts of San Bernardino and Riverside Counties.
*   **Infrastructure Damage:**
    *   **Transportation:** Reports of collapsed overpasses on major freeways (I-10, I-405) near the epicenter. Significant damage to surface streets, including cracks, buckling, and sinkholes. Los Angeles International Airport (LAX) reports runway damage and is temporarily closed.
    *   **Utilities:** Widespread power outages across the region. Natural gas leaks reported in multiple locations, leading to fires. Water main breaks are causing flooding in some areas. Cell phone service is disrupted due to overloaded networks and damaged infrastructure.
    *   **Buildings:** Older, unreinforced masonry buildings in downtown Los Angeles and older residential areas have suffered significant damage or collapse. Newer buildings, while generally structurally sound, have experienced non-structural damage (broken windows, fallen debris).
*   **Casualties:** Initial reports indicate a high number of injuries and fatalities. Hospitals are overwhelmed with patients. Search and rescue operations are underway to locate trapped individuals in collapsed buildings.
*   **Population Displacement:** Thousands of people are displaced from their homes due to damage or fear of aftershocks. Emergency shelters are being set up in schools, community centers, and parks.
*   **Emergency Services:** Fire departments, police, and paramedics are stretched thin responding to calls for help. Communication difficulties are hampering coordination efforts.
*   **Aftershocks:** Numerous aftershocks are being felt, including several above magnitude 4.0, causing further panic and hindering rescue efforts.

**Key Challenges:**

*   **Search and Rescue:** Accessing and rescuing trapped individuals in collapsed buildings is the top priority. Specialized urban search and rescue teams are needed.
*   **Medical Surge Capacity:** Hospitals are overwhelmed. Triage and medical care are needed at the scene of collapses and in temporary shelters.
*   **Shelter and Basic Needs:** Providing shelter, food, water, and sanitation to displaced populations is critical.
*   **Communication:** Restoring communication networks is essential for coordinating the response.
*   **Traffic Congestion:** Damaged roads and freeways are causing massive traffic jams, hindering emergency vehicle access.
*   **Fire Suppression:** Controlling fires caused by natural gas leaks is a major concern.
*   **Public Panic:** Fear and anxiety are widespread, leading to panic and hindering orderly evacuation.
*   **Resource Allocation:** Prioritizing and allocating limited resources (personnel, equipment, supplies) effectively is crucial.

**Potential Escalation Factors:**

*   **Major Aftershock:** A large aftershock could cause further building collapses and casualties.
*   **Tsunami Threat:** Although the epicenter is inland, a large earthquake could trigger a local tsunami, impacting coastal areas.
*   **Hazardous Materials Release:** Damage to industrial facilities could lead to the release of hazardous materials.
*   **Civil Unrest:** As resources become scarce, there is a risk of looting and civil unrest.

**Immediate Actions Required:**

1.  **Activate Emergency Operations Center (EOC):** Coordinate the response efforts of all agencies.
2.  **Deploy Search and Rescue Teams:** Focus on areas with collapsed buildings.
3.  **Establish Medical Triage Centers:** Set up temporary medical facilities to treat the injured.
4.  **Open Emergency Shelters:** Provide shelter and basic needs to displaced populations.
5.  **Restore Communication Networks:** Prioritize restoring communication for emergency responders and the public.
6.  **Assess Infrastructure Damage:** Conduct a rapid assessment of damage to critical infrastructure.
7.  **Request Mutual Aid:** Request assistance from neighboring jurisdictions and state/federal agencies.
8.  **Issue Public Safety Announcements:** Provide clear and accurate information to the public about the situation and safety precautions.

**This is a dynamic situation. The information above represents the initial assessment and is subject to change as more information becomes available.**

**Your Role:** You are the Director of the Los Angeles County Office of Emergency Management. What are your next steps? What resources do you prioritize? How do you communicate with the public?


In [13]:
EARTHQUAKE_PAIRWISE_PROMPT = """\
# Instruction
You are an expert evaluator in earthquake disaster preparedness and AI systems. Your task is to evaluate the quality of simulated earthquake scenario responses generated by two different AI models.

We will provide you with the user input describing a simulation goal or scenario, along with two AI-generated responses (Response A and Response B). First, analyze each response individually based on the Evaluation Criteria. Then, compare both and determine which one provides a more realistic, useful, and well-structured earthquake simulation.

Give a detailed explanation for your judgment, then clearly state your preferred response ("A", "B", or "SAME").

# Evaluation
## Metric Definition
You are assessing the quality of earthquake simulation responses, focusing on how well each output meets the scenario requirements and delivers realistic, grounded, actionable information that would assist in crisis planning or response.

## Criteria
1. **Realism and Accuracy**: The response reflects plausible earthquake outcomes based on known seismic patterns, realistic damage, and expected effects. It avoids hallucinated data or exaggerated assumptions.
2. **Relevance and Usefulness**: The response aligns with the intended use (e.g., planning, evacuation, damage forecasting) and contains relevant content such as affected zones, infrastructure impact, population risk, etc.
3. **Clarity and Structure**: The response is well-organized and easy to interpret, especially for responders or planners in time-sensitive situations.
4. **Completeness**: The response fully addresses the prompt without omitting critical details necessary for understanding or action.

## Rating Rubric
"A": Response A simulates the scenario more effectively than Response B, based on the criteria.
"SAME": Both responses are equally effective.
"B": Response B simulates the scenario more effectively than Response A, based on the criteria.

## Evaluation Steps
STEP 1: Analyze Response A based on the four evaluation criteria.
STEP 2: Analyze Response B based on the same criteria.
STEP 3: Compare the overall performance of both responses.
STEP 4: Output your preference of "A", "SAME", or "B" to the `pairwise_choice` field according to the rubric.
STEP 5: Output your reasoning in the `explanation` field.

# User Inputs and AI-generated Responses
## User Inputs
### Prompt
{prompt}

# AI-generated Response

### Response A
{baseline_model_response}

### Response B
{response}
"""


class AnswerComparison(enum.Enum):
  A = 'A'
  SAME = 'SAME'
  B = 'B'


@functools.cache
def eval_pairwise(prompt, response_a, response_b, n=1):
  """Determine the better of two answers to the same prompt."""

  chat = client.chats.create(model='gemini-2.0-flash')

  # Generate the full text response.
  response = chat.send_message(
      message=EARTHQUAKE_PAIRWISE_PROMPT.format(
          prompt=[prompt, document_eval_file],
          baseline_model_response=response_a,
          response=response_b)
  )
  verbose_eval = response.text

  # Coerce into the desired structure.
  structured_output_config = types.GenerateContentConfig(
      response_mime_type="text/x.enum",
      response_schema=AnswerComparison,
  )
  response = chat.send_message(
      message="Convert the final score.",
      config=structured_output_config,
  )
  structured_eval = response.parsed

  return verbose_eval, structured_eval

answer_A002 = answer_question(request, document_eval_file)
answer_A003 = answer_question(request, document_sim_file)

text_eval, struct_eval = eval_pairwise(
    prompt=request,
    response_a=answer_A002,
    response_b=answer_A003,
)

display(Markdown(text_eval))
print(struct_eval)

STEP 1: Analyze Response A
Response A is well-organized and provides a comprehensive overview of the immediate, short-term, mid-term, and long-term impacts of a 6.8 magnitude earthquake in Los Angeles. It realistically portrays the expected damage to buildings and infrastructure, the challenges faced by emergency responders, and the potential for psychological trauma among residents. The inclusion of "Key Challenges" is useful for focusing simulation efforts. It covers important aspects like communication breakdowns, resource allocation and public safety.

STEP 2: Analyze Response B
Response B offers a detailed initial situation report. It includes a specific epicenter location, which enhances the realism. The report format with numbered sections is structured and easy to follow. B highlights specific areas of concern, such as Downtown Los Angeles, Hollywood, and low-income communities, making the simulation more targeted. The inclusion of possible escalation factors adds depth. It also offers relevant questions to consider for simulation, which could prompt deeper analysis.

STEP 3: Compare the overall performance of both responses
Both responses are strong. Response B is slightly better because it gives an example date for the earthquake, specific location for the epicenter, and includes possible escalation factors. All of these things make the simulation more realistic than A.

STEP 4: Output your preference
B

STEP 5: Output your reasoning
Response B is preferred for its more realistic and actionable simulation of an earthquake scenario in Los Angeles. While Response A provides a good overview, Response B offers more specific details, such as the epicenter location and potential escalation factors (e.g., dam failure, civil unrest), which enhance the realism and usefulness of the simulation. The structured "Initial Situation Report" format in Response B, coupled with relevant questions for consideration, also contributes to a more effective simulation.


AnswerComparison.B


## 3. Simulate social media crisis messages (Few-shot prompting)
In this part, few-shot prompting is used to simulate social media posts (e.g.,X messages) that might appear during an earthquake scenario. By providing a few example messages, the model learns the pattern and generates new, contextually similar outputs.

The temperature parameter is set to 0.9 to increase diversity and creativity in the generated messages, reflecting the variety of real-world social media expressions during a crisis.

In [14]:
tweet_examples = [
    "I'm stuck near downtown! Buildings collapsed!",
    "No power or water since 2am!",
    "Can someone send help to East Valley shelter?",
]

user_prompt = f'''
Generate 5 realistic tweets from residents in {location} during a {magnitude} magnitude earthquake at {time}. 
The area has {population_density} population density. Tweets should reflect panic, help requests, and status updates.
Include relevant hashtags like #earthquake and #{location}.

Tweets:
'''

tw_prompt =  "\n".join(tweet_examples) + user_prompt
model_config = types.GenerateContentConfig(temperature=0.9) #high temp for more diverse 

tweet_response = client.models.generate_content(
    model='gemini-2.0-flash',
    config=model_config,
    contents=[tw_prompt])

tweet_final_resp = tweet_response.text
Markdown(tweet_final_resp)

Okay, here are 5 realistic tweets reflecting the situation you described, with a 6.8 magnitude earthquake hitting Los Angeles at 3:45 AM, focusing on downtown and the East Valley:

1.  **@SoCalMomOf2:** HUGE earthquake just woke us up! Shaking hasn't stopped! Kids are terrified. We're near Pershing Square and heard some crashes outside. Anyone else okay downtown? #earthquake #LosAngeles #LAquake

2.  **@DTLA_Foodie:** Holy crap that was the big one! My apartment building downtown is swaying like crazy. Power's out. Anyone know if the 110 is still open? Need to get out of here ASAP. #earthquake #LosAngeles #DTLA

3.  **@EastValley_Hope:** We're at the East Valley shelter and it's overflowing. Buildings collapsed nearby. No power or water since 2AM! Need immediate assistance! Can someone send help to East Valley shelter? #earthquake #LosAngeles #EastValley #Help

4.  **@TechBroLA:** Felt like the world ended. Just checked on my neighbors in Koreatown, everyone seems shaken but okay. Power outage across the whole block. Waiting for aftershocks... this is insane. #earthquake #LosAngeles #Koreatown

5.  **@FilmStudent88:** My building near USC just partially collapsed. I'm trapped but okay for now, on the second floor. Need rescue! Corner of Figueroa and Exposition. Please send help! #earthquake #LosAngeles #USC #RescueMe


## 4. Analyze uploaded reports (RAG, Embeddings, Vector Database)
In this section, we use Chroma to store document chunks as embeddings in a vector database. This enables a Retrieval-Augmented Generation (RAG) approach, where relevant document sections are retrieved and incorporated into the prompt. This method yields more accurate and context-aware responses compared to simply feeding entire documents directly into the model.

In [15]:
!pip install PyMuPDF  # Gemini do not have direct access to the file path, need to manually use python to pass contents



In [16]:
from chromadb import Documents, EmbeddingFunction, Embeddings
from google.api_core import retry

from google.genai import types

In [17]:
!wget -nv -O Myanmar_SitRep.pdf https://www.unocha.org/attachments/2c9df378-1961-4841-bfea-2a37c31a5c9d/Myanmar%20Earthquake%20Situation%20Report%201_final.pdf 
document_sitrep_file = client.files.upload(file='Myanmar_SitRep.pdf')

2025-04-19 15:46:28 URL:https://www.unocha.org/attachments/2c9df378-1961-4841-bfea-2a37c31a5c9d/Myanmar%20Earthquake%20Situation%20Report%201_final.pdf [570579/570579] -> "Myanmar_SitRep.pdf" [1]


In [18]:
!wget -nv -O EQ_Guidelines.pdf https://www.urban-response.org/system/files/content/resource/files/main/26164-earthquakeguidelinesenweb.pdf
document_guide_file = client.files.upload(file='EQ_Guidelines.pdf')

2025-04-19 15:46:30 URL:https://www.urban-response.org/system/files/content/resource/files/main/26164-earthquakeguidelinesenweb.pdf [3002002/3002002] -> "EQ_Guidelines.pdf" [1]


In [19]:
# get the directory of uploaded files for later use

import os
# Check current working directory
print(os.getcwd())

# Print the list of files
files = os.listdir()
print(files)

/kaggle/working
['__notebook__.ipynb', 'gemini.pdf', 'EQ_Guidelines.pdf', 'Myanmar.pdf', 'Myanmar_SitRep.pdf']


In [20]:
import fitz  # PyMuPDF
from typing import List

def extract_text_from_pdf(pdf_path: str) -> str:
    text = ""
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text += page.get_text()
    return text

#TEST: Load and preview
#kaggle_path = "/kaggle/working/"
#pdf_path = kaggle_path+"Myanmar_SitRep.pdf"
#raw_text = extract_text_from_pdf(pdf_path)
#print("Preview:", raw_text[:1000])

In [21]:
# chunk size needs to adjust to 1300 to avoid resource exhause
def chunk_text(text: str, chunk_size: int = 1300, overlap: int = 200) -> List[str]:
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap
    return chunks

#TEST
#doc_chunks = chunk_text(combined_text)
#print(f"Generated {len(doc_chunks)} chunks.")

In [22]:
import time
def embed(text):
    emb_response = client.models.embed_content(
        model='models/text-embedding-004',
        contents=text, 
        config=types.EmbedContentConfig(task_type='RETRIEVAL_DOCUMENT'))
    time.sleep(1)  # Add a delay to avoid exceeding the rate limit
    return emb_response.embeddings[0].values 

In [23]:
# preprocessing documents
kaggle_path = "/kaggle/working/"
overview_text = extract_text_from_pdf(kaggle_path+"Myanmar.pdf")
sitrep_plan_text = extract_text_from_pdf(kaggle_path+"Myanmar_SitRep.pdf")
guide_text = extract_text_from_pdf(kaggle_path+"EQ_Guidelines.pdf")
combined_text = overview_text + "\n\n" + sitrep_plan_text + "\n\n" + guide_text

In [24]:
# chunk documents, embed chunks and store in vectore database
import chromadb

DB_NAME = "earthquake_docs"

chroma_client = chromadb.Client()
db = chroma_client.get_or_create_collection(name=DB_NAME) 

# Load and chunk PDF
doc_chunks = chunk_text(combined_text)
print(f"Generated {len(doc_chunks)} chunks.")

# Convert text chunks to embeddings and store
for i, chunk in enumerate(doc_chunks):
    emb = embed(chunk)
    db.add(documents=[chunk], embeddings=[emb], ids=[f"chunk_{i}"])

print(f"Stored {len(doc_chunks)} chunks in Chroma vector DB. ")

Generated 183 chunks.
Stored 183 chunks in Chroma vector DB. 


In [25]:
def retrieve_relevant_chunks(query: str, top_k=12): #top-k is picked for
    query_emb = embed(query)
    results = db.query(
        query_embeddings=[query_emb],
        n_results=top_k,
        include=["documents"]
    )
    return results["documents"][0] # Accessing the retrieved documents

In [26]:
def ask_rag(query):
    top_docs = retrieve_relevant_chunks(query)
    context = "\n\n".join(top_docs)

    rag_prompt = f"""
    You are a disaster response analyst. Use the following retrieved document excerpts to answer the query:
    --- DOCUMENT CONTEXT START ---
    {context}
    --- DOCUMENT CONTEXT END ---
    Answer this query in a clear and concise way: {query}
    """
    rag_response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=[rag_prompt])
    return Markdown(rag_response.text)

# Test if the model can answer the question correctly using RAG
ask_rag("What areas are affected by the earthquake? ")

The areas severely affected by the earthquake are Mandalay Region, Nay Pyi Taw Union Territory, Sagaing Region, and southern Shan State. To a lesser extent, Bago Region, Magway Region, and eastern Shan State were also affected.


## 5. Generate structured crisis response plans (RAG，Structured output/JSON mode)
We use a RAG approach to generate an earthquake response plan based on three uploaded documents:
- The 2025 Myanmar Earthquake Disaster Brief from IFRC
- The Myanmar Earthquake Situation Report and Response Plan from the UN
- An Earthquake Response Guideline from IFRC

The function below not only generates a detailed response plan but also outputs a structured summary in JSON format.

In [27]:
def generate_response_plan_rag(user_query):
    # 1. Retrieve most relevant chunks using similarity search
    retrieved_chunks = retrieve_relevant_chunks(user_query)
    context = "\n\n".join(retrieved_chunks)

    # 2. Build the RAG prompt
    resp_prompt = f"""
    You are a disaster response expert.
    Using the retrieved Disaster overview, SitRep and Response Plan 
    and Earthquake response guideline context below, answer the query in a 
    structured and clear format. Then output a structured JSON summary.

    ### Context:
    {context}

    ### User Query:
    {user_query}

    ### Instructions:
    1. Address:
      - Affected areas
      - Rescue & response priorities
      - Coordination roles
      - Resource needs
      - Gaps & recommendations

    2. Then give a summary in the following JSON format:
    {{
      "areas_affected": [],
      "injuries": [],
      "priority_actions": [],
      "resources_needed": [],
      "coordination_roles": [],
      "gaps": []
    }}

    ### Response:
    """

    response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=[resp_prompt])

    return response.text

# Test：Example 
user_query = "Generate an earthquake response plan for affected regions in Myanmar based on the documents."
response_text = generate_response_plan_rag(user_query)
print("Full Response Plan:")
Markdown(response_text)

Full Response Plan:


## Earthquake Response Plan for Myanmar

This plan outlines the immediate response to the earthquake in Myanmar, focusing on affected areas, rescue priorities, coordination, resource needs, and identified gaps.

**Affected Areas:**

*   Sagaing
*   Mandalay
*   Nay Pyi Taw Union Territory
*   Bago
*   Pyay
*   Yedashe
*   Taungoo
    These regions have experienced widespread destruction, including collapsed buildings and damaged infrastructure.

**Rescue & Response Priorities:**

1.  **Search and Rescue:** Although the response is shifting to recovery, continue to identify anyone who remains in the rubble.
2.  **Medical Assistance:**
    *   Address critical shortages of life- and limb-saving medical supplies in affected areas.
    *   Support hospitals in Mandalay and Naypyidaw, which are overwhelmed with injured people and lack resources.
3.  **Emergency Shelter:**
    *   Provide safe emergency shelter, considering the pre-crisis shortage and the additional needs created by the earthquake.
    *   Address the needs of those sheltering in public buildings, where hygiene facilities are inadequate.
4.  **Food and Water:**
    *   Ensure access to food and drinking water, which are urgent priorities.
    *   Protect food stocks from heavy rain and flooding.
5.  **WASH (Water, Sanitation, and Hygiene):**
    *   Address WASH needs, especially in displacement settings.
    *   Ensure safe and gender-sensitive WASH facilities to mitigate risks to women and girls.
6.  **Protection:**
    *   Scale up prevention and response activities, including the distribution of dignity kits.
    *   Establish safe spaces.
    *   Deploy mobile psychosocial support teams.
    *   Disseminate community-based messaging on protection, including explosive ordnance risks, child protection, and GBV mitigation.
7.  **Mental Health and Psychosocial Support (MHPSS):**
    *   Provide MHPSS services through online support, helplines, trained professionals, and field interventions.
8.  **Education in Emergencies:**
    *   Distribute critical education-in-emergency supplies, such as roofing sheets and learning materials.

**Coordination Roles:**

*   **Logistics Cluster:** Coordinate with humanitarian partners to assess capacity and requirements for transportation and storage.
*   **Nutrition Cluster:** Coordinate with partners on the ground to assess and respond to emerging needs.
*   **Protection Cluster:** Reinforce referral pathways through coordinated efforts with local actors and service providers.
*   **Mine Action Area of Responsibility:** Develop earthquake-specific risk education messages for broad dissemination.
*   **Humanitarian partners:** All partners should participate in needs assessments and provide assistance in their respective areas of expertise.
*   **Local governments, councils, and authorities:** Coordination with local entities should be strengthened to support their response.

**Resource Needs:**

*   **Financial Assistance:** Pledges from donors are needed to fund the response.
*   **Medical Supplies:** Critical shortages of life- and limb-saving medical supplies need to be addressed.
*   **Shelter Materials:** Roofing sheets and other materials are needed to repair damaged schools and homes.
*   **Food and Water:** Food and drinking water are urgent priorities.
*   **Dignity Kits and Women’s Essential Kits:** These kits are needed to support affected women and girls.
*   **Transportation and Storage:** Capacity for transportation and storage is needed, especially in Mandalay, Nay Pyi Taw, and Yangon.

**Gaps & Recommendations:**

*   **Unmet Needs:** 47% of those assessed have yet to receive any form of assistance.
*   **Access Challenges:** Damaged road infrastructure, movement restrictions, and lengthy import procedures are hindering response efforts.
*   **Hygiene Facilities:** Inadequate hygiene facilities in displacement sites pose risks to women and children.
*   **Information Dissemination:** Ensure affected people receive timely and life-saving information.
*   **Funding Shortages:** Persistent underfunding limits the ability of development actors to implement disaster risk reduction (DRR) and community resilience work.
*   **Coordination with Local Governments:** Strengthen coordination with local governments, councils, and authorities.
*   **Prioritize Restoration:** Restore existing infrastructure, systems, and services on a priority basis to support the emergency response.
*   **Advocacy:** Advocate with local governments and other actors on common approaches, guidelines, and policies for the recovery process.

## JSON Summary:

```json
{
  "areas_affected": [
    "Sagaing",
    "Mandalay",
    "Nay Pyi Taw Union Territory",
    "Bago",
    "Pyay",
    "Yedashe",
    "Taungoo"
  ],
  "injuries": [
    "Trauma injuries",
    "Illnesses due to lack of sanitation and clean water",
    "Psychological distress"
  ],
  "priority_actions": [
    "Search and rescue",
    "Provide medical assistance",
    "Establish emergency shelter",
    "Ensure access to food and water",
    "Address WASH needs",
    "Provide protection",
    "Provide mental health and psychosocial support",
    "Address education in emergencies"
  ],
  "resources_needed": [
    "Financial assistance",
    "Medical supplies",
    "Shelter materials",
    "Food and water",
    "Dignity kits and women’s essential kits",
    "Transportation and storage capacity"
  ],
  "coordination_roles": [
    "Logistics Cluster",
    "Nutrition Cluster",
    "Protection Cluster",
    "Mine Action Area of Responsibility",
    "Humanitarian partners",
    "Local governments, councils, and authorities"
  ],
  "gaps": [
    "Unmet needs of affected population",
    "Access challenges due to damaged infrastructure and restrictions",
    "Inadequate hygiene facilities in displacement sites",
    "Insufficient information dissemination",
    "Funding shortages",
    "Coordination gaps with local governments"
  ]
}
```


## Future Work
- Enhance the file upload feature to allow users to upload various disaster-related documents, enabling the generation of crisis response plans beyond the 2025 Myanmar Earthquake scenario—making the tool adaptable to any disaster context.
- Extend the generate_response_plan_rag(user_query) function to also support response plan generation based on simulated disaster scenarios created in Section 1, together with social media simulation in Section 3 as part of inputs.
- Introduce user role selection (e.g., policymaker, field responder, NGO planner) with a simple UI, so the generated plan can be tailored based on the role's perspective and needs.
- Experiment with alternative prompting strategies and few-shot examples to further improve response relevance, accuracy, and structure.
- Integrate GenAI evaluation techniques to automatically assess the quality and usefulness of the generated plans based on predefined criteria.
- Set up continuous evaluation and improvement pipelines using MLOps tools to monitor performance, update embeddings, and track document relevance over time.
- (Will be interesting to have) Generate visual displays like interactive response timelines or affected regions.