In [10]:
import pandas as pd
import numpy as np

In [102]:
flood_incidents = pd.read_csv('./flood_incidents.csv')

In [73]:
# Define Damage function
damage_function = np.array([
    [0.00, 0.00],
    [0.5, 0.38],
    [1, 0.54],
    [1.5, 0.66],
    [2, 0.76],
    [3, 0.88],
    [4, 0.94],
    [5, 0.98],
    [6, 1.00]
])

# Extract depth and damage factor arrays
depth_values = damage_function[:, 0]
damage_factors = damage_function[:, 1]

In [74]:
# Define function to estimate flood damage

def estimate_damage(flood_depth, flood_area, max_damage_per_sqm):
    """
    Compute flood damage based on interpolated damage function.

    Parameters:
        flood_depth (float): Average flood depth (m).
        flood_area (float): Flooded area (sqm).
        max_damage_per_sqm (float): Maximum damage per sqm.

    Returns:
        float: Estimated flood damage.
    """
    if pd.isna(flood_depth) or pd.isna(flood_area) or flood_area <= 0:
        return 0  # No damage if depth or area is missing/invalid

    # Interpolate damage factor based on flood depth
    damage_factor = np.interp(flood_depth/100, depth_values, damage_factors)

    # Compute estimated damage
    return damage_factor * max_damage_per_sqm * flood_area

In [75]:
# Calculate flood damage for each incident using the ave

MAX_DAMAGE = 309 # Predetermined hyperparameter

damage = flood_incidents

damage['estimated_damage'] = flood_incidents.apply(
    lambda row: estimate_damage(row['avg_flood_depth'], row['area_sqm'], MAX_DAMAGE), axis=1)

In [76]:
# Convert damage to NTD in 2025

EURO_TO_NTD = 34.01  
TW_CPI_2010 = 82.5  
TW_CPI_2025 = 109.43  

# Convert estimated damage from EUR (2010) to NTD (2025)
damage['estimated_damage_adjusted'] = (
    damage['estimated_damage'] * EURO_TO_NTD * (TW_CPI_2025 / TW_CPI_2010)
)

In [77]:
# Export damage record as csv
damage.to_csv("damage.csv", index=False)

In [98]:
# Prepare file for tableau by grouping data
damage['start_time'] = pd.to_datetime(damage['start_time'], errors='coerce')
damage['year'] = damage['start_time'].dt.year
damage_per_county = damage.groupby(['county', 'year'])['estimated_damage_adjusted'].sum().reset_index()
damage_per_county.to_csv("flood_damage_by_county.csv", index=False)

In [101]:
damage_per_county

Unnamed: 0,county,year,estimated_damage_adjusted
0,嘉義市,2021,20403000000.0
1,嘉義市,2022,6668380000.0
2,嘉義縣,2020,2802593000000.0
3,嘉義縣,2021,64203010000.0
4,嘉義縣,2022,45661410000.0
5,基隆市,2022,16564840000.0
6,屏東縣,2021,217139300000.0
7,新北市,2021,38457210000.0
8,新北市,2022,12561900000.0
9,新竹市,2020,27245020000.0


In [93]:
# Load the CSV file into a Pandas DataFrame


# Connect to an in-memory SQLite database
conn = sqlite3.connect(":memory:")

# Load the DataFrame into a SQL table
damage.to_sql("d", conn, if_exists="replace", index=False)

# Run an SQL query (e.g., filter by year 2025)
query = "SELECT sum(estimated_damage) from d WHERE strftime('%Y', start_time) BETWEEN '2021' AND '2022'"
damage_filtered = pd.read_sql(query, conn)

# Display results
print(damage_filtered)

# Close the connection
conn.close()


   sum(estimated_damage)
0           5.305544e+10
