In [24]:
# ---------------------------------------------------------
# CELL 0: Mount Google Drive
# ---------------------------------------------------------
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive mounted. Ready to access interim files.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted. Ready to access interim files.


In [None]:
!pip install geopandas rioxarray pyogrio



#Explanation for CELL 1
This cell imports the necessary geospatial libraries and establishes the Target Projected CRS as UTM Zone 51 North (EPSG:32651). It then loads all vector data (aoi, barangays, facilities, flood) directly from the permanent Google Drive interim folder. This method ensures the data is loaded reliably, bypassing the unstable temporary local storage of the Colab runtime.

In [26]:
# ---------------------------------------------------------
# CELL 1: Imports, Setup, and Load Interim Data
# ---------------------------------------------------------
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import rioxarray as rxr
from shapely.geometry import mapping
import numpy as np
import shutil

# --- CONFIGURATION ---
TARGET_CRS_EPSG = 32651 # UTM Zone 51 North for Bulacan
TARGET_CRS_NAME = "UTM Zone 51 North (EPSG:32651)"

# --- DRIVE PATH SETUP (FIXED FOR STABILITY) ---
DRIVE_BASE_PATH = '/content/drive/MyDrive/Data Viz Case Study/'
DRIVE_INTERIM_PATH = Path(DRIVE_BASE_PATH) / 'interim'

# Ensure the local temporary folder for outputting intermediate results exists
os.makedirs("data/interim", exist_ok=True)


# --- LOAD INTERIM DATA DIRECTLY FROM GOOGLE DRIVE ---
print("\n--- LOADING INTERIM FILES DIRECTLY FROM GOOGLE DRIVE ---")
try:
    # Load files using the absolute Drive path defined above
    aoi = gpd.read_file(DRIVE_INTERIM_PATH / "aoi.gpkg")
    gdf_brgy = gpd.read_file(DRIVE_INTERIM_PATH / "barangays.gpkg")
    gdf_facilities = gpd.read_file(DRIVE_INTERIM_PATH / "facilities.gpkg")
    gdf_flood_raw = gpd.read_file(DRIVE_INTERIM_PATH / "flood.gpkg")

    # Placeholder for Roads layer (may be empty)
    try:
        gdf_roads = gpd.read_file(DRIVE_INTERIM_PATH / "roads.gpkg")
    except Exception:
        gdf_roads = gpd.GeoDataFrame()

    print("✅ All interim vector layers loaded successfully from Google Drive.")

except Exception as e:
    print(f"[FATAL ERROR] Failed to load interim files from Drive. Check Drive Mount and folder path. Error: {e}")

print(f"Target Projected CRS set to: {TARGET_CRS_NAME}")


--- LOADING INTERIM FILES DIRECTLY FROM GOOGLE DRIVE ---
✅ All interim vector layers loaded successfully from Google Drive.
Target Projected CRS set to: UTM Zone 51 North (EPSG:32651)


#Explanation for CELL 2
This cell executes Task 2: Reprojection. It converts all GeoDataFrames from the Geographic CRS (WGS84) to the chosen Projected CRS, UTM Zone 51 North (EPSG:32651), using the .to_crs() method. This step is critical because WGS84 uses angular units (degrees) which are inaccurate for measuring distances or areas. By projecting the data to UTM, all subsequent operations, such as buffering, will be calculated in meters, ensuring metric accuracy for the final analysis.

In [27]:
# ---------------------------------------------------------
# CELL 2: Task 2 - Reproject All Vector Layers
# ---------------------------------------------------------
print("\n--- TASK 2: REPROJECTING VECTOR LAYERS ---")

# Reproject AOI and save it to the local temporary folder for efficiency
aoi_proj = aoi.to_crs(TARGET_CRS_EPSG)
aoi_proj.to_file("data/interim/aoi_proj.gpkg", driver="GPKG")
print(f"AOI CRS changed from {aoi.crs.to_epsg()} to {aoi_proj.crs.to_epsg()}.")

# Reproject remaining layers
gdf_brgy_proj = gdf_brgy.to_crs(TARGET_CRS_EPSG)
gdf_facilities_proj = gdf_facilities.to_crs(TARGET_CRS_EPSG)
gdf_facilities_proj.to_file("data/interim/facilities_proj.gpkg", driver="GPKG") # Save projected facilities

if not gdf_roads.empty:
    gdf_roads_proj = gdf_roads.to_crs(TARGET_CRS_EPSG)
else:
    gdf_roads_proj = gdf_roads

if not gdf_flood_raw.empty:
    gdf_flood_proj = gdf_flood_raw.to_crs(TARGET_CRS_EPSG)
    print("Flood raw layer reprojected.")
else:
    gdf_flood_proj = gdf_flood_raw
    print("Flood layer empty, skipping reprojection.")

print("Reprojection to metric CRS complete.")


--- TASK 2: REPROJECTING VECTOR LAYERS ---
AOI CRS changed from 32651 to 32651.
Flood raw layer reprojected.
Reprojection to metric CRS complete.


In [32]:
# ---------------------------------------------------------
# DIAGNOSTIC CELL: Find the Municipality Column
# ---------------------------------------------------------
print("\n--- DIAGNOSTIC CHECK: BARANGAY COLUMNS ---")
if not gdf_brgy_proj.empty:
    print("Available Columns in gdf_brgy_proj (Look for City/Municipality Name):")
    # Print the list of all column names
    print(gdf_brgy_proj.columns.tolist())
else:
    print("gdf_brgy_proj is EMPTY. Please go back and ensure Notebook 01 ran correctly and clipped the barangays.")


--- DIAGNOSTIC CHECK: BARANGAY COLUMNS ---
Available Columns in gdf_brgy_proj (Look for City/Municipality Name):
['adm1_psgc', 'adm2_psgc', 'adm3_psgc', 'adm4_psgc', 'adm4_en', 'geo_level', 'len_crs', 'area_crs', 'len_km', 'area_km2', 'geometry']


#Explanation for CELL 3
This cell is the core of Task 3: Buffering. It first uses the .dissolve() function on the flood GeoDataFrame to merge all overlapping or adjacent flood zones into a single, clean geometric feature, which is necessary for creating an accurate boundary. It then uses the .buffer(100) method to create a polygon that extends 100 meters outward from the clean flood hazard area. This resulting gdf_flood_buffer defines the final hazard exposure zone used to query population data.

In [29]:
# ---------------------------------------------------------
# CELL 3: Task 3 - Geometry Hygiene (Create Flood Buffer)
# ---------------------------------------------------------
print("\n--- TASK 3: CREATING 100M FLOOD BUFFER ---")

if not gdf_flood_proj.empty:
    # 1. Geometry Hygiene: Dissolve the flood polygons to simplify and merge any overlaps
    print("Dissolving flood polygons...")
    gdf_flood_dissolved = gdf_flood_proj.dissolve(by=None)

    # 2. Buffer Creation: Create a 100-meter buffer around the dissolved flood zones.
    FLOOD_BUFFER_DISTANCE_M = 100

    # Perform the buffer operation on the geometry column, which returns a GeoSeries
    # We explicitly convert the GeoSeries back into a new GeoDataFrame using the 'buffer'
    # as the geometry, which prevents the naming conflict.
    buffered_geometry = gdf_flood_dissolved.geometry.buffer(FLOOD_BUFFER_DISTANCE_M)

    # Create the new GeoDataFrame using the buffered geometry
    gdf_flood_buffer = gpd.GeoDataFrame(geometry=buffered_geometry, crs=TARGET_CRS_EPSG)

    print(f"Flood hazard buffered by {FLOOD_BUFFER_DISTANCE_M} meters.")
else:
    print("Skipping buffer: Flood layer is empty.")
    gdf_flood_buffer = gpd.GeoDataFrame()


--- TASK 3: CREATING 100M FLOOD BUFFER ---
Dissolving flood polygons...
Flood hazard buffered by 100 meters.


#Explanation for CELL 4
This cell performs a preparatory administrative step. It uses the .dissolve() function to aggregate the smaller Barangay polygons into larger Municipality polygons based on the unique name in the 'adm3_en' column. This creates a simplified municipal boundary layer (gdf_municipality_proj), which is required for later analysis steps (or reporting) where results need to be summarized at the municipal level instead of the barangay level.

In [33]:
# ---------------------------------------------------------
# CELL 4: Task 3 - Admin Clean-up (Dissolving Barangays)
# ---------------------------------------------------------
print("\n--- TASK 3: ADMIN CLEAN-UP (Dissolving Barangays) ---")

# FIX: We now use 'adm3_psgc' as the grouping key, which was identified in the diagnostic check.
MUNICIPALITY_COLUMN = 'adm3_psgc'

if not gdf_brgy_proj.empty and MUNICIPALITY_COLUMN in gdf_brgy_proj.columns:

    # Dissolve the barangay boundaries based on the identified municipality column
    # NOTE: Dissolving by PSGC code ('adm3_psgc') groups all barangays belonging to the same municipality.
    gdf_municipality_proj = gdf_brgy_proj.dissolve(by=MUNICIPALITY_COLUMN)

    # Reset index to make the PSGC code a regular column
    gdf_municipality_proj = gdf_municipality_proj.reset_index()

    # Optional: Keep a descriptive column if available (e.g., 'adm2_psgc' or 'adm4_en' columns)
    # The dissolution takes the first non-null value for other columns.

    print(f"Barangay layer dissolved into {len(gdf_municipality_proj)} Municipalities using column '{MUNICIPALITY_COLUMN}'.")

    # Save the result locally
    gdf_municipality_proj.to_file("data/interim/municipality_proj.gpkg", driver="GPKG")
    print("Municipal boundaries saved to interim folder.")

else:
    # This should now only happen if the GeoDataFrame is completely empty.
    print("❌ Skipping admin clean-up: Barangay layer is empty.")
    gdf_municipality_proj = gpd.GeoDataFrame()


--- TASK 3: ADMIN CLEAN-UP (Dissolving Barangays) ---
Barangay layer dissolved into 39 Municipalities using column 'adm3_psgc'.
Municipal boundaries saved to interim folder.


#Explanation for CELL 5
This cell calculates the Population Exposure metric. It performs a Spatial Join between the raster data (population density) and the vector data (100m flood buffer). The rioxarray.rio.clip() method uses the gdf_flood_buffer to cut the reprojected WorldPop raster, isolating only the population pixels that fall within the hazard zone. The final metric—Total Estimated Exposed Population—is derived by summing all the remaining pixel values in the clipped raster (pop_in_hazard.sum().item()).