### Workbook for comparing population counts for villages surrounding Rwanda bridges
Week of April 21, 2025
Author: Adele Birkenes

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon
import rasterio
import os
import numpy as np

Step 1: Read in needs assessment data

In [None]:
path = "../../synced-data/population-exploration/"

# Specify file path
needs_assessment_fp = os.path.join(path, "Rwanda Sites with All Population Fields_Exported 2025.04.11.csv")

# Read the .csv file into a dataframe with an encoding that accommodates special characters
needs_assessment = pd.read_csv(needs_assessment_fp, encoding='ISO-8859-1')

print(needs_assessment.head())

Step 2: Filter bridges to those that have been completed and create new fields to list the villages associated with each bridge and their summed population counts (according to the needs assessments)

In [None]:
# Filter bridges to those that have been completed
completed_bridges = needs_assessment[needs_assessment['Bridge Opportunity: Stage'] == 'Complete'].copy()

# Combine village names from six columns into one list per bridge
village_columns = ['Community Served 1', 'Community Served 2', 'Community Served 3', 'Community Served 4', 'Community Served 5', 'Community Served 6']
completed_bridges['Village List'] = completed_bridges[village_columns].apply(lambda row: [village for village in row if pd.notna(village)], axis=1)

# Sum population counts for all villages served by each bridge
population_columns = ['Individuals served in Community 1', 'Individuals served in Community 2',
                     'Individuals served in Community 3', 'Individuals served in Community 4',
                     'Individuals served in Community 5', 'Individuals Served in Community 6'] #note that case is different for community 6
completed_bridges['Total Needs Assessment Pop'] = completed_bridges[population_columns].apply(lambda row: row.sum(), axis=1)

# Display the results
print(completed_bridges[['Village List', 'Total Needs Assessment Pop']].head())

# Double-check summing - Display rows where the total sum does not match the sum of the six population columns
mismatched_rows = completed_bridges[completed_bridges['Total Needs Assessment Pop'] != completed_bridges[population_columns].sum(axis=1)]
print(mismatched_rows)