### Workbook for comparing population counts for villages surrounding Rwanda bridges
Week of April 21, 2025
Author: Adele Birkenes

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon
import rasterio
import os
import numpy as np

Task 1: Read in needs assessment data as dataframe and Rwanda village boundaries as geodataframe

In [None]:
# Specify file path for needs assessment data
needs_assessment_fp = os.path.join("../../synced-data/population-exploration/", "Rwanda Sites with All Population Fields_Exported 2025.04.11.csv")

# Read the .csv file into a dataframe with an encoding that accommodates special characters
needs_assessment = pd.read_csv(needs_assessment_fp, encoding='ISO-8859-1')

# Print first 3 rows of needs assessment data
print(needs_assessment.head(3))

In [None]:
# Specify file path for Rwanda village boundaries
Rwanda_village_boundaries_fp = os.path.join("../../synced-data/population-exploration/Rwanda Village Boundaries", "Village.shp")

# Read the shapefile into a geodataframe
Rwanda_village_boundaries = gpd.read_file(Rwanda_village_boundaries_fp)

# Print CRS of geodataframe
print(f'The CRS of Rwanda_village_boundaries is {Rwanda_village_boundaries.crs}\n')

# Print first 3 rows of village boundaries data
print(Rwanda_village_boundaries.head(3))

Task 2: Filter bridges to those that have been completed and create new fields to list the villages associated with each bridge and their summed population counts (according to the needs assessments)

In [None]:
# Filter bridges to those that have been completed
completed_bridges = needs_assessment[needs_assessment['Bridge Opportunity: Stage'] == 'Complete'].copy()

# Combine village names from six columns into one list per bridge
village_columns = ['Community Served 1', 'Community Served 2', 'Community Served 3', 'Community Served 4', 'Community Served 5', 'Community Served 6']
completed_bridges['Village List'] = completed_bridges[village_columns].apply(lambda row: [village for village in row if pd.notna(village)], axis=1)

# Sum population counts for all villages served by each bridge
population_columns = ['Individuals served in Community 1', 'Individuals served in Community 2',
                     'Individuals served in Community 3', 'Individuals served in Community 4',
                     'Individuals served in Community 5', 'Individuals Served in Community 6'] #note that case is different for community 6
completed_bridges['Total Needs Assessment Pop'] = completed_bridges[population_columns].apply(lambda row: row.sum(), axis=1)

# Display the results
print(completed_bridges[['Village List', 'Total Needs Assessment Pop']].head())

# Double-check summing - Display rows where the total sum does not match the sum of the six population columns
mismatched_rows = completed_bridges[completed_bridges['Total Needs Assessment Pop'] != completed_bridges[population_columns].sum(axis=1)]
print(f'\nMismatched rows: {mismatched_rows}')

Task 3: Create dataframe of communities included in needs assessments, where each row is a community (admin 5) and contains the community's associated population

In [None]:
# Add unique identifier to each row in needs assessment dataframe to allow for matching of communities and their population counts
# Note: Cannot use CaseSafeID, as some CaseSafeIDs correspond to multiple rows in dataset
needs_assessment['Row ID'] = needs_assessment.index

# Select community columns
community_columns = ['Community Served 1', 'Community Served 2', 'Community Served 3', 
                     'Community Served 4', 'Community Served 5', 'Community Served 6']

# Melt the needs assessment dataframe to create a long format table, where each row corresponds to a community served by a bridge
communities_served = needs_assessment.melt(
    id_vars=needs_assessment.columns[:21].tolist() + ['Row ID'],  # Preserve all fields up to "Bridge Opportunity: Population Estimate 5000m" and include "Row ID"
    value_vars=community_columns, 
    var_name='Community Column', 
    value_name='Community Name')

# Display the first few rows of the melted dataframe
print(communities_served.head())

# Select individuals served count columns
individuals_served_columns = ['Individuals served in Community 1', 'Individuals served in Community 2', 
                              'Individuals served in Community 3', 'Individuals served in Community 4', 
                              'Individuals served in Community 5', 'Individuals Served in Community 6']

# Create a separate melted dataframe for individuals served counts
individuals_served = needs_assessment.melt(
    id_vars=['Bridge Opportunity: CaseSafeID', 'Row ID'],
    value_vars=individuals_served_columns,
    var_name='Individuals Served Column',
    value_name='Individuals Served')

# Display the first few rows of the melted individuals served dataframe
print(individuals_served.head())

In [None]:
# Create new columns for community number in both dataframes (extracting the numbers from the dataframes' respective community columns)
communities_served['Community Number'] = communities_served['Community Column'].str.extract(r'(\d+)').astype(str)
individuals_served['Community Number'] = individuals_served['Individuals Served Column'].str.extract(r'(\d+)').astype(str)

# Merge the two dataframes on row ID and community number
communities_served = communities_served.merge(
    individuals_served,
    on=['Row ID', 'Community Number'],
    how='left')

# Drop rows where 'Community Name' is NaN
communities_served = communities_served.dropna(subset=['Community Name'])

# Display the resulting table
print(communities_served.head())

Task 4: Join each community with level 5 admin boundaries, also checking that admin 3 & 4 match