### Workbook for calculating journey distances for Uganda catchment survey respondents
Week of September 8, 2025
<br>
Author: Adele Birkenes

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon
import os
import matplotlib.pyplot as plt
import numpy as np

Task 1: Read in Uganda bridge sites (df), catchment survey data (df), and village boundaries (gdf)

In [2]:
synced_catchment_path = "../../synced-data/catchment-analysis"
synced_population_path = "../../synced-data/population-exploration/Uganda"
graphics_path = "../../notebooks/catchment-analysis/graphics"

# Uganda bridge sites (dataframe)
bridge_sites_fp = os.path.join(synced_population_path, "All B2P Uganda Sites_2025.07.14.csv")
Uganda_bridge_sites = pd.read_csv(bridge_sites_fp)

# Uganda catchment survey data (dataframe)
catchment_surveys_fp = os.path.join(synced_catchment_path, "All Catchment Surveys - Uganda-2025 update.csv")
Uganda_catchment_surveys = pd.read_csv(catchment_surveys_fp)

# Uganda village boundaries (geodataframe)
village_boundaries_fp = os.path.join(synced_population_path,"uga_admbnda_ubos_20200824_shp/uga_admbnda_adm4_ubos_20200824.shp")
Uganda_village_boundaries = gpd.read_file(village_boundaries_fp)

Task 2: Explore admin codes of catchment survey data

In [3]:
# Count the number of unique letter combinations used in the "Origin - Admin Code" column
# These are letters that are in the first one or two digits of the code preceding the hyphen and numbers
unique_origin_admin_codes = Uganda_catchment_surveys["Origin - Admin Code"].str.split('-').str[0].unique()
print(f"Unique origin admin code letters: {unique_origin_admin_codes}")
print(f"Number of unique origin admin code letters: {len(unique_origin_admin_codes)}")

# Count number of responses for each letter combination
origin_admin_code_counts = Uganda_catchment_surveys["Origin - Admin Code"].str.split('-').str[0].value_counts()
print(f"Count of responses with each origin admin code letter: {origin_admin_code_counts}")

Unique origin admin code letters: ['V' nan 'D' 'SC' 'P' 'C']
Number of unique origin admin code letters: 6
Count of responses with each origin admin code letter: Origin - Admin Code
V     6395
SC     151
D      139
P       76
C        2
Name: count, dtype: int64


In [4]:
# Count the number of unique letter combinations used in the "Home Village - Admin Code" column
unique_home_admin_codes = Uganda_catchment_surveys["Home Village - Admin Code"].str.split('-').str[0].unique()
print(f"Unique home admin code letters: {unique_home_admin_codes}")
print(f"Number of unique home admin code letters: {len(unique_home_admin_codes)}")

# Count number of responses for each letter combination
home_admin_code_counts = Uganda_catchment_surveys["Home Village - Admin Code"].str.split('-').str[0].value_counts()
print(f"Count of responses with each home admin code letter: {home_admin_code_counts}")

Unique home admin code letters: ['V' nan 'D' 'C' 'SC' 'P']
Number of unique home admin code letters: 6
Count of responses with each home admin code letter: Home Village - Admin Code
V     6482
D      119
SC      73
P       72
C        7
Name: count, dtype: int64


In [5]:
# Count the number of unique letter combinations used in the "Destination - Admin Code" column
unique_destination_admin_codes = Uganda_catchment_surveys["Destination - Admin Code"].str.split('-').str[0].unique()
print(f"Unique destination admin code letters: {unique_destination_admin_codes}")
print(f"Number of unique destination admin code letters: {len(unique_destination_admin_codes)}")

# Count number of responses for each letter combination
destination_admin_code_counts = Uganda_catchment_surveys["Destination - Admin Code"].str.split('-').str[0].value_counts()
print(f"Count of responses with each destination admin code letter: {destination_admin_code_counts}")

Unique destination admin code letters: ['V' 'D' nan 'SC' 'C' 'P' 'Kasangali A' 'CT']
Number of unique destination admin code letters: 8
Count of responses with each destination admin code letter: Destination - Admin Code
V              6373
D               178
SC              123
P                79
C                 4
CT                2
Kasangali A       1
Name: count, dtype: int64


In [6]:
# The numbers following the hyphens have varying lengths, which likely correspond to different admin levels
# For each digit length in origin admin codes, count the number of responses with that length
origin_admin_code_lengths = Uganda_catchment_surveys["Origin - Admin Code"].str.split('-').str[1].str.len()
origin_admin_code_length_counts = origin_admin_code_lengths.value_counts().sort_index()
print("Origin admin code lengths (excluding letters) and their response counts:")
print(origin_admin_code_length_counts)

Origin admin code lengths (excluding letters) and their response counts:
Origin - Admin Code
1.0      26
2.0      99
3.0     248
4.0    1831
5.0    4559
Name: count, dtype: int64


In [7]:
# For each digit length in home admin codes, count the number of responses with that length
home_admin_code_lengths = Uganda_catchment_surveys["Home Village - Admin Code"].str.split('-').str[1].str.len()
home_admin_code_length_counts = home_admin_code_lengths.value_counts().sort_index()
print("Home admin code lengths (excluding letters) and their response counts:")
print(home_admin_code_length_counts)

Home admin code lengths (excluding letters) and their response counts:
Home Village - Admin Code
1.0      20
2.0      78
3.0     461
4.0    1773
5.0    4421
Name: count, dtype: int64


In [8]:
# For each digit length in destination admin codes, count the number of responses with that length
destination_admin_code_lengths = Uganda_catchment_surveys["Destination - Admin Code"].str.split('-').str[1].str.len()
destination_admin_code_length_counts = destination_admin_code_lengths.value_counts().sort_index()
print("Destination admin code lengths (excluding letters) and their response counts:")
print(destination_admin_code_length_counts)

Destination admin code lengths (excluding letters) and their response counts:
Destination - Admin Code
1.0      36
2.0     113
3.0     429
4.0    1682
5.0    4499
Name: count, dtype: int64


In [9]:
# For each unique letter combination used in the "Origin - Admin Code" column, calculate the average number of digits following the hyphen
avg_origin_code_lengths = Uganda_catchment_surveys.groupby(Uganda_catchment_surveys["Origin - Admin Code"].str.split('-').str[0])["Origin - Admin Code"].apply(lambda x: x.str.split('-').str[1].str.len().mean())
print("Average number of digits for each origin admin code letter:")
print(avg_origin_code_lengths)

Average number of digits for each origin admin code letter:
Origin - Admin Code
C     2.000000
D     2.086331
P     3.973684
SC    3.317881
V     4.689601
Name: Origin - Admin Code, dtype: float64


In [10]:
# For each unique letter combination used in the "Home Village - Admin Code" column, calculate the average number of digits following the hyphen
avg_home_code_lengths = Uganda_catchment_surveys.groupby(Uganda_catchment_surveys["Home Village - Admin Code"].str.split('-').str[0])["Home Village - Admin Code"].apply(lambda x: x.str.split('-').str[1].str.len().mean())
print("Average number of digits for each home village admin code letter:")
print(avg_home_code_lengths)

Average number of digits for each home village admin code letter:
Home Village - Admin Code
C     2.428571
D     2.067227
P     3.930556
SC    3.315068
V     4.623264
Name: Home Village - Admin Code, dtype: float64


In [11]:
# For each unique letter combination used in the "Destination - Admin Code" column, calculate the average number of digits following the hyphen
def get_digits_after_hyphen(val):
    try:
        return str(val).split('-')[1]
    except Exception:
        return None

Uganda_catchment_surveys["DestinationDigits"] = Uganda_catchment_surveys["Destination - Admin Code"].apply(get_digits_after_hyphen)
avg_destination_code_lengths = Uganda_catchment_surveys.groupby(
    Uganda_catchment_surveys["Destination - Admin Code"].astype(str).str.split('-').str[0]
)["DestinationDigits"].apply(lambda x: x.dropna().map(lambda y: len(str(y)) if pd.notnull(y) else 0).mean())

print("Average number of digits for each destination admin code letter:")
print(avg_destination_code_lengths)

Average number of digits for each destination admin code letter:
Destination - Admin Code
C              3.000000
CT             2.000000
D              2.028090
Kasangali A         NaN
P              3.987342
SC             3.455285
V              4.653225
nan                 NaN
Name: DestinationDigits, dtype: float64


In [14]:
# Filter village boundaries gdf to fields of interest: "ADM4_EN", "ADM4_PCODE", "ADM3_EN", "ADM3_PCODE", "ADM2_EN", "ADM2_PCODE", "ADM1_EN", "ADM1_PCODE"
village_boundaries_filtered = Uganda_village_boundaries[[
    "ADM4_EN", "ADM4_PCODE", "ADM3_EN", "ADM3_PCODE", "ADM2_EN", "ADM2_PCODE", "ADM1_EN", "ADM1_PCODE"
]]
village_boundaries_filtered.head()

# Export filtered village boundaries gdf to csv
village_boundaries_csv_fp = os.path.join(synced_population_path,"uga_admin_boundaries.csv")
village_boundaries_filtered.to_csv(village_boundaries_csv_fp, index=False)

Admin levels in village boundaries gdf:

| Admin level number | Admin level name | Number of digits |
|-------------|--------------------------|-------------------------------|
| 4           | Sub-county/Town Council/Division                     | 8                       |
| 3           | County/Municipality/City Division                   | 6                     |
| 2           | District                 | 4                      |
| 1           | Region                 | 1                       |

Problem statement: On the one hand, the village boundaries shapefile contains admin codes with a fixed number of digits for each admin level. On the other hand, the catchment survey data contains admin codes with letters (likely corresponding to admin levels) followed by a hyphen and a variable number of digits. There is not a standard number of digits associated with each letter combination/admin level. Therefore, there does not appear to be a straightforward way to match the admin codes in the catchment survey data to the admin codes in the village boundaries shapefile.