### Workbook for extracting WorldPop population estimates for varying bridge catchment distances
Week of May 12, 2025
Author: Adele Birkenes

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon
import rasterio
from rasterio.mask import mask
import os
import numpy as np
import matplotlib.pyplot as plt
import folium
from folium import GeoJson

Task 1: Read in data on bridge sites, population (WorldPop), and village boundaries

Note: The village boundaries are not used in this analysis, but they contain the custom Rwanda Transverse Mercator projection that will be copied over to the bridge sites and WorldPop data

In [None]:
synced_path = "../../synced-data/population-exploration/"
unsynced_path = "../../unsynced-data"

# Village boundaries (geodataframe)
village_boundaries_fp = os.path.join(synced_path,"Rwanda Village Boundaries/Village.shp")
Rwanda_village_boundaries = gpd.read_file(village_boundaries_fp)

# Bridge sites (dataframe)
bridge_sites_fp = os.path.join(synced_path, "Rwanda Sites with All Population Fields_Exported 2025.04.11.csv")
bridge_sites = pd.read_csv(bridge_sites_fp, encoding='ISO-8859-1') # Note: This encoding accommodates special characters

In [None]:
# Convert bridge sites dataframe to geodataframe that has custom Rwanda TM CRS copied from village boundaries geodataframe
def map_bridges(bridges, bridges_lat, bridges_lon, village_boundaries):

    # Check CRS of village boundaries gdf
    print(f'The CRS of the village boundaries gdf is: {village_boundaries.crs}')

    # Create lat/lon variables
    lon = bridges[bridges_lon]
    lat = bridges[bridges_lat]

    # Create gdf of bridges data by converting lat/lon values to list of Shapely Point objects
    bridge_points = gpd.GeoDataFrame(bridges, geometry=gpd.points_from_xy(x=lon, y=lat), crs='EPSG:4326')

    # Set CRS of bridges gdf to CRS of village boundaries gdf
    bridge_points.to_crs(village_boundaries.crs, inplace=True)

    # Check that reprojection was successful
    print(f'The CRS of the bridges gdf is: {bridge_points.crs}')
    
    return bridge_points

bridge_points = map_bridges(bridges = bridge_sites,
                            bridges_lat = "Bridge Opportunity: GPS (Latitude)",
                            bridges_lon = "Bridge Opportunity: GPS (Longitude)",
                            village_boundaries = Rwanda_village_boundaries)

In [None]:
# WorldPop raster
# Note: This was pre-projected to the Rwanda TM CRS

# Specify file path for Worldpop raster
worldpop_fp = os.path.join(unsynced_path, "reprojected_population_raster.tif")

# Print CRS of raster
with rasterio.open(worldpop_fp) as src:
    print(src.crs)

Task 2: Create 1 km, 1.5 km, and 2 km buffers around each bridge site

In [None]:
def create_bridge_buffers(bridges, buffer_distance):

    # Create buffers around bridges according to user's distance input
    buf = bridges.geometry.buffer(distance=buffer_distance)

    # Add bridges point attribute information to buffers
    bridge_buffers = bridge_points.copy()
    bridge_buffers['geometry'] = buf

    return bridge_buffers

bridge_buffers_1km = create_bridge_buffers(bridges = bridge_points, buffer_distance = 1000)
bridge_buffers_1_5km = create_bridge_buffers(bridges = bridge_points, buffer_distance = 1500)
bridge_buffers_2km = create_bridge_buffers(bridges = bridge_points, buffer_distance = 2000)

In [None]:
# Create a base map
buffer_map = folium.Map(location=[-2.0, 30.0], zoom_start=8)

# Map bridge points and buffers
GeoJson(bridge_buffers_1km, name="1 km Buffers").add_to(buffer_map)
GeoJson(bridge_buffers_1_5km, name="1.5 km Buffers").add_to(buffer_map)
GeoJson(bridge_buffers_2km, name="2 km Buffers").add_to(buffer_map)

# Add layer control to toggle between layers
folium.LayerControl().add_to(buffer_map)

# Display the map
buffer_map

Task 3: Extract population estimates from WorldPop raster for each buffer

In [None]:
def extract_pop(reprojected_pop_raster, population_raster_name, villages_near_bridges):
    
    # Copy input geodataframe to avoid modifying the original
    villages_copy = villages_near_bridges.copy()
    
    with rasterio.open(reprojected_pop_raster) as src:
        # Print information from raster profile
        print("CRS:", src.crs)
        print("Raster shape:", src.shape)
        print("Number of Bands:", src.count)

        # Iterate over each village and extract population data
        results = []
        for idx, row in villages_copy.iterrows():
            try:
                out_image = mask(src, [row.geometry], crop=True)
                out_image = out_image[0]

                # Handle masked arrays and replace with 0
                if np.ma.is_masked(out_image):
                    out_image = out_image.filled(0)

                # Remove negative values (e.g., nodata placeholders)
                out_image = np.where(out_image < 0, 0, out_image)

                total_population = np.sum(out_image)

            except Exception as e:
                print(f"Error at village {idx}: {e}")
                total_population = 0  # or np.nan

            results.append(total_population)

        # Add population data to the gdf
        villages_copy[population_raster_name] = results

    return villages_copy

pop_estimates_1km = extract_pop(reprojected_pop_raster = worldpop_fp,
                                population_raster_name = "WorldPop_1km",
                                villages_near_bridges = bridge_buffers_1km)

pop_estimates_1_5km = extract_pop(reprojected_pop_raster = worldpop_fp,
                                  population_raster_name = "WorldPop_1_5km",
                                  villages_near_bridges = bridge_buffers_1_5km)

pop_estimates_2km = extract_pop(reprojected_pop_raster = worldpop_fp,
                                population_raster_name = "WorldPop_2km",
                                villages_near_bridges = bridge_buffers_2km)