### Workbook for extracting WorldPop population estimates for Uganda bridge catchment areas
Week of August 4, 2025
<br>
Author: Adele Birkenes

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon
import rasterio
from rasterio.mask import mask
from rasterio.merge import merge
from rasterio.plot import show
from rasterio.warp import calculate_default_transform, reproject, Resampling
import os
import numpy as np
import matplotlib.pyplot as plt
import folium
from folium import GeoJson

**Task 1: Read in data on bridge sites and village boundaries**

Note: The village boundaries are not used in this analysis, but I am checking whether they contain a custom Uganda projection

In [6]:
synced_path = "../../synced-data/population-exploration/Uganda"
unsynced_path = "../../unsynced-data"

# Village boundaries (geodataframe)
village_boundaries_fp = os.path.join(synced_path,"uga_admbnda_ubos_20200824_shp/uga_admbnda_adm4_ubos_20200824.shp")
Uganda_village_boundaries = gpd.read_file(village_boundaries_fp)
print("Uganda village boundaries CRS:", Uganda_village_boundaries.crs)

# Bridge sites (dataframe)
bridge_sites_fp = os.path.join(synced_path, "All B2P Uganda Sites_2025.07.14.csv")
Uganda_bridge_sites = pd.read_csv(bridge_sites_fp)

Uganda village boundaries CRS: EPSG:4326


Conclusion: The village boundaries are unprojected (WGS84). However, it is appropriate to use a projected CRS for distance calculations such as buffers. Therefore, I will use UTM zone 36N (EPSG:32636), which covers most of Uganda.

**Task 2: Check for rows with missing/invalid coordinates, which can create invalid geometries and cause problems. Create a dataframe of invalid bridge sites for review**

In [25]:
# Check for missing or obviously invalid coordinates
print(Uganda_bridge_sites[["GPS (Latitude)", "GPS (Longitude)"]].isnull().sum())
print(Uganda_bridge_sites[["GPS (Latitude)", "GPS (Longitude)"]].describe())
print(bridge_points.is_valid.value_counts())

GPS (Latitude)     12
GPS (Longitude)    12
dtype: int64
       GPS (Latitude)  GPS (Longitude)
count      598.000000       598.000000
mean         0.641824        31.522621
std          0.913415         1.925981
min         -1.459017        29.630709
25%          0.098429        30.071547
50%          0.674117        30.303297
75%          1.064221        34.289442
max          3.750561        35.355083
True     598
False     12
Name: count, dtype: int64


In [27]:
# Create a dataframe of rows with missing latitude or longitude
Uganda_bridge_sites_missing_coords = Uganda_bridge_sites[
    Uganda_bridge_sites["GPS (Latitude)"].isnull() | Uganda_bridge_sites["GPS (Longitude)"].isnull()
]

# Export to CSV
Uganda_bridge_sites_missing_coords_fp = os.path.join(synced_path, "Uganda_bridge_sites_missing_coords.csv")
Uganda_bridge_sites_missing_coords.to_csv(Uganda_bridge_sites_missing_coords_fp, index=False)

**Task 3: Create a geodataframe of bridge sites with valid coordinates; use custom Uganda projection**

In [33]:
# Convert bridge sites dataframe to geodataframe that has custom Uganda projection
def map_bridges(bridges, bridges_lat, bridges_lon, projection='EPSG:32636'):

    # Remove rows with missing lat/lon values
    bridges = bridges.dropna(subset=[bridges_lat, bridges_lon])

    # Create lat/lon variables from filtered dataframe
    lon = bridges[bridges_lon]
    lat = bridges[bridges_lat]

    # Create gdf of bridges data by converting lat/lon values to list of Shapely Point objects
    bridge_points = gpd.GeoDataFrame(bridges, geometry=gpd.points_from_xy(x=lon, y=lat), crs='EPSG:4326')

    # Set CRS of bridges gdf to custom Uganda projection
    bridge_points = bridge_points.to_crs(projection)

    # Check that reprojection was successful
    print(f'CRS of bridge sites: {bridge_points.crs}')
    
    return bridge_points

bridge_points = map_bridges(bridges = Uganda_bridge_sites,
                            bridges_lat = "GPS (Latitude)",
                            bridges_lon = "GPS (Longitude)")

#bridge_points.explore()

CRS of bridge sites: EPSG:32636


**Task 4: Create 3 km buffers around bridge sites**

In [48]:
def create_bridge_buffers(bridges, buffer_distance):

    # Create buffers around bridges according to user's distance input
    buf = bridges.geometry.buffer(distance=buffer_distance)

    # Add bridges point attribute information to buffers
    bridge_buffers = bridges.copy()
    bridge_buffers['geometry'] = buf

    return bridge_buffers

bridge_buffers_3km = create_bridge_buffers(bridges = bridge_points, buffer_distance = 3000)

#bridge_buffers_3km.explore()

**Task 5: Read in WorldPop rasters for Uganda and surrounding countries that overlap with bridge buffers: DRC, South Sudan, Rwanda, and Kenya**

In [36]:
# Create list of WorldPop raster files for Uganda and surrounding countries: DRC, South Sudan, Rwanda, and Kenya
countries = ["uga","cod","ssd","rwa","ken"]
WorldPop_2020 = [os.path.join(unsynced_path, f"WorldPop_Rasters_2020.nosync/{country}_ppp_2020_UNadj_constrained.tif") for country in countries]

# Mosaic WorldPop rasters in list
WorldPop_2020_mosaic, mosaic_transform = merge([rasterio.open(raster) for raster in WorldPop_2020])

# Specify output file path for the mosaic
mosaic_fp = os.path.join(unsynced_path, "WorldPop_Rasters_2020.nosync/worldpop_2020_mosaic_for_uganda.tif")

# Calculate width and height from the shape of the mosaic array
height, width = WorldPop_2020_mosaic.shape[1], WorldPop_2020_mosaic.shape[2]

# Save the mosaic to a GeoTIFF file
with rasterio.open(
    mosaic_fp, 
    'w', 
    driver='GTiff', 
    height=height, 
    width=width, 
    count=1, 
    dtype=WorldPop_2020_mosaic.dtype, 
    crs="EPSG:4326",
    transform=mosaic_transform
) as dst:
    dst.write(WorldPop_2020_mosaic[0], 1)

**Task 6: Extract population estimates from raster mosaic for bridge buffers**

Note: So that projecting the raster mosaic can be avoided, the bridge buffers will be reprojected to WGS84. Uganda is along the equator, so the distortion of WGS84 is minimal.

In [38]:
# Use extract pop function to work with unprojected raster (converts buffer gdf to match raster CRS)
def extract_pop_unproj(pop_raster, population_field_name, buffers):

    # Copy input geodataframe to avoid modifying the original
    buffers_copy = buffers.copy()

    with rasterio.open(pop_raster) as src:
        # Print information from raster profile
        print("Raster CRS:", src.crs)
        print("Raster shape:", src.shape)
        print("Number of Bands:", src.count)

        # Reproject buffer gdf to match raster CRS and print new CRS
        buffers_copy = buffers_copy.to_crs(src.crs)
        print("Buffer CRS:", buffers_copy.crs)

        # Iterate over each village and extract population data
        results = []
        for idx, row in buffers_copy.iterrows():
            try:
                out_image = mask(src, [row.geometry], crop=True)
                out_image = out_image[0]

                # Handle masked arrays and replace with 0
                if np.ma.is_masked(out_image):
                    out_image = out_image.filled(0)

                # Remove negative values (e.g., nodata placeholders)
                out_image = np.where(out_image < 0, 0, out_image)

                total_population = np.sum(out_image)

            except Exception as e:
                print(f"Error at buffer {idx}: {e}")
                total_population = 0  # or np.nan

            results.append(total_population)

        # Add population data to the gdf
        buffers_copy[population_field_name] = results

    return buffers_copy

# Get file path for WorldPop raster
WorldPop_raster_fp = os.path.join(unsynced_path, "WorldPop_Rasters_2020.nosync/worldpop_2020_mosaic_for_uganda.tif")

pop_estimates_3km_unproj = extract_pop_unproj(
    pop_raster=WorldPop_raster_fp,
    population_field_name="WorldPop_3km",
    buffers=bridge_buffers_3km)

Raster CRS: EPSG:4326
Raster shape: (30809, 35616)
Number of Bands: 1
Buffer CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]


In [43]:
# Identify buffers with population estimates of 0 and map them in explore()
zero_population_buffers = pop_estimates_3km_unproj[pop_estimates_3km_unproj["WorldPop_3km"] == 0]
zero_population_buffers

Unnamed: 0,Opportunity Name,Project Code,Level 1 Government,Level 2 Government,Level 3 Government,B2P Fiscal Year,Stage,Sub-Stage,Close Date,Bridge Type,Span (m),GPS (Latitude),GPS (Longitude),Population Estimate 2000m,Population Estimate 5000m,Individuals Directly Served,geometry,WorldPop_3km
4,Uganda - Tongole - 1008762,1008762,Eastern Region,Manafwa,Manafwa Town Council,2019.0,Complete,In Service,43518.0,Suspension Bridge,74.8,1.937883,34.2691,0.0,2578.0,3000.0,"POLYGON ((34.29607 1.93786, 34.29594 1.9352, 3...",0.0


In [46]:
# Calculate summary statistics for population estimates
pop_estimates_3km_unproj["WorldPop_3km"].describe()

count       598.000000
mean      20023.343750
std       14373.556641
min           0.000000
25%        9545.085449
50%       16778.311523
75%       26516.054688
max      131532.703125
Name: WorldPop_3km, dtype: float64

In [47]:
# Calculate mean population estimates for all unique values in "Stage" column
mean_population_by_stage = pop_estimates_3km_unproj.groupby("Stage")["WorldPop_3km"].mean().reset_index()
mean_population_by_stage.columns = ["Stage", "Mean_Population"]
mean_population_by_stage

Unnamed: 0,Stage,Mean_Population
0,Complete,14335.8125
1,Identified,19934.001953
2,Ready to Confirm,27971.414062
3,Rejected,24074.998047
4,Under Construction,6370.564453
