# Import Libraries & Data

In [5]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json

In [6]:
# Import Urban Flood Risk data
path = "/Users/charlottelin/Documents/Achievement 6 Urban Flood Risk"
file_path = os.path.join(path, '02 Data', 'Prepared Data', 'urban_pluvial_flood_risk_clean.csv')
df = pd.read_csv(file_path)

# Create new column for mapping

### The "historical_rainfall_intensity_mm_hr" column and the "return_period_year" column should be combined to create an index of "rainfall_intensity_per_year". This will make the data more robust and easier to map in a world map.

In [7]:
# Create the combined column
df['rainfall_intensity_per_year'] = (
    df['historical_rainfall_intensity_mm_hr'] /
    df['return_period_years'].replace(0, np.nan)
)

# Replace any resulting NaN values with 0
df['rainfall_intensity_per_year'] = df['rainfall_intensity_per_year'].fillna(0)

In [8]:
# Check output
df.head()

Unnamed: 0,segment_id,city_name,admin_ward,latitude,longitude,catchment_id,elevation_m,dem_source,land_use,soil_group,drainage_density_km_per_km2,storm_drain_proximity_m,storm_drain_type,rainfall_source,historical_rainfall_intensity_mm_hr,return_period_years,risk_labels,rainfall_intensity_per_year
0,SEG-00001,"Colombo, Sri Lanka",Borough East,6.920633,79.9126,CAT-136,9.17,Copernicus_EEA-10_v5,Institutional,B,4.27,160.5,CurbInlet,ERA5,39.4,50,monitor,0.788
1,SEG-00002,"Chennai, India",Ward D,13.076487,80.281774,CAT-049,-2.19,Copernicus_EEA-10_v5,Residential,D,7.54,105.316404,OpenChannel,ERA5,56.8,25,ponding_hotspot|low_lying|event_2025-05-02,2.272
2,SEG-00003,"Ahmedabad, India",Sector 12,23.019473,72.638578,CAT-023,30.88,SRTM_3arc,Industrial,B,11.0,152.5,OpenChannel,IMD,16.3,5,monitor,3.26
3,SEG-00004,"Hong Kong, China",Sector 14,22.302602,114.078673,CAT-168,24.28,SRTM_3arc,Residential,B,7.32,37.0,Manhole,ERA5,77.0,10,monitor,7.7
4,SEG-00005,"Durban, South Africa",Sector 5,-29.887602,30.911008,CAT-171,35.7,SRTM_3arc,Industrial,C,4.5,292.4,OpenChannel,ERA5,20.8,5,monitor,4.16


In [9]:
# Export dataframe with the new column
df.to_csv(
    os.path.join(path, '02 Data', 'Prepared Data', 'urban_pluvial_flood_risk_clean2.csv'),
    index=False)

# Create Choropleth Map with Rainfall_Intensity_Per_year

In [10]:
# Initialize base map centered roughly on global mean coordinates
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()],
               zoom_start=2,
               tiles='cartodb positron')

In [11]:
# Add a color scale legend using branca
import branca.colormap as cm

In [12]:
# Define color scale based on rainfall intensity
min_val = df['rainfall_intensity_per_year'].min()
max_val = df['rainfall_intensity_per_year'].max()

In [14]:
# Check color output
colormap = cm.linear.YlOrRd_09.scale(min_val, max_val)
colormap.caption = 'Rainfall Intensity per Year (mm/hr per year)'
colormap.add_to(m)

In [15]:
# Add points to the map
for _, row in df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=5,  # You can adjust radius or make it proportional to intensity
        color=None,
        fill=True,
        fill_opacity=0.7,
        fill_color=colormap(row['rainfall_intensity_per_year']),
        popup=folium.Popup(
            f"<b>City:</b> {row['city_name']}<br>"
            f"<b>Rainfall Intensity per Year:</b> {row['rainfall_intensity_per_year']:.2f}",
            max_width=250
        )
    ).add_to(m)

In [18]:
# Show Map
m

In [19]:
# Export map as HTML
m.save("/Users/charlottelin/Documents/Achievement 6 Urban Flood Risk/04 Analysis/Visualizations/rainfall_intensity_world_map.html")

## Observations: Rainfall Intensity Per Year Visualization

#### The choropleth-style point map provides a global overview of a newly created column `rainfall intensity per year` across urban catchments in the dataset. Each point represents a specific urban drainage segment, colored by its calculated annualized rainfall intensity. The color gradient from yellow (lower intensity) to red (higher intensity) visually highlights regions most exposed to heavy and frequent rainfall, which are often correlated with increased urban flood risk.

- Regional Patterns: South and Southeast Asia display the highest rainfall intensities, with notable hotspots around Chennai, Colombo, and Hong Kong. These align with known monsoonal climates and coastal exposure, validating the dataset’s spatial realism.
In contrast, many urban areas in southern Africa and parts of South America show moderate-to-high intensity but lower overall spatial clustering, reflecting more localized storm events rather than broad climatic systems.

- Data Integrity and Mapping Accuracy: The point-based visualization effectively substitutes for a country-level choropleth. Each observation’s latitude and longitude correspond well with expected city locations, confirming data quality and geographic coherence.
The single-color scale legend ensures consistency in interpretation, and the intensity values (generally between 0–8 mm/hr per year) fall within plausible hydrological ranges for short-duration rainfall events normalized by recurrence periods.

- Implications for Urban Flood Risk: Cities with higher rainfall_intensity_per_year values face more frequent and concentrated rainfall loads, increasing pressure on drainage systems. Combined with attributes like soil group and land use, this index could inform priority areas for flood mitigation or climate-resilient infrastructure investment.

#### Connection to research questions: Since the `rainfall_intensity_per_year` column was newly created, it doesn't speak directly to any existing questions, but only loosely to Question 3 regarding topography and rainfall. This new column reveals the relationship by accounting for temporal recurrence. This offers a more standardized way to compare risk across climates, something not explicitly captured before.

#### Therefore, I'm adding a new question 9. <b>How does normalized rainfall intensity per year correlate with flood-related infrastructure characteristics and land use categories, and can it serve as a proxy indicator for climate-adjusted flood exposure across urban segments?</b>