# Merging Subcounty-Level Data on PREP Services with Subcounty Population Data from the Uganda Census

In [1]:
import pandas as pd

In [2]:
pop = pd.read_csv('subcounty_population_adjusted.csv')

In [3]:
pop.head()

Unnamed: 0,PIRS District,PIRS Subcounty,subcounty_pop_adjusted
0,Katakwi District,Usuk Subcounty,21860.72
1,Serere District,Labori Subcounty,20473.31
2,Amuria District,Abarilela Subcounty,25018.7
3,Amuria District,Orungo Subcounty,15200.74
4,Kampala District,Central Division,154846.08


In [4]:
pop = pop.rename(columns={'PIRS District':'District','PIRS Subcounty':'Subcounty'})

In [5]:
pop.head()

Unnamed: 0,District,Subcounty,subcounty_pop_adjusted
0,Katakwi District,Usuk Subcounty,21860.72
1,Serere District,Labori Subcounty,20473.31
2,Amuria District,Abarilela Subcounty,25018.7
3,Amuria District,Orungo Subcounty,15200.74
4,Kampala District,Central Division,154846.08


In [6]:
pop.dtypes

District                   object
Subcounty                  object
subcounty_pop_adjusted    float64
dtype: object

In [7]:
pop.shape

(2179, 3)

In [8]:
dup_counts = pop.groupby(['District', 'Subcounty']).size()
print(dup_counts[dup_counts > 1])

District           Subcounty            
Adjumani District  Adjumani Town Council    2
                   Adropi Subcounty         2
                   Dzaipi Subcounty         4
                   Pakele Subcounty         3
                   Ukusijoni Subcounty      2
                                           ..
Yumbe District     Ariwa Subcounty          2
                   Kochi Subcounty          2
                   Kululu Subcounty         4
                   Odravu West Subcounty    3
                   Romogi Subcounty         4
Length: 394, dtype: int64


In [9]:
dup_rows = pop[pop.duplicated(subset=['District', 'Subcounty'], keep=False)]
print(dup_rows.sort_values(by=['District', 'Subcounty']))

               District              Subcounty  subcounty_pop_adjusted
1246  Adjumani District  Adjumani Town Council                36495.99
1247  Adjumani District  Adjumani Town Council                36495.99
1248  Adjumani District       Adropi Subcounty                12065.42
1249  Adjumani District       Adropi Subcounty                12065.42
1405  Adjumani District       Dzaipi Subcounty                44174.64
...                 ...                    ...                     ...
2174                NaN                    NaN                     NaN
2175                NaN                    NaN                     NaN
2176                NaN                    NaN                     NaN
2177                NaN                    NaN                     NaN
2178                NaN                    NaN                     NaN

[1210 rows x 3 columns]


In [10]:
pop = pop.drop_duplicates()

In [11]:
pop.shape

(1364, 3)

In [12]:
pp = pd.read_csv('uganda_prep_revised.csv')

In [13]:
pp.head()

Unnamed: 0,DHIS2 District,DHIS2 Subcounty,DHIS2 HF Name,Agency,"MSM, Male",Transgender,Sex Workers,PWID,People in prisons and other enclosed settings (Incarcerated Population),Pregnant,...,"Mobile Populations (e.g., migrant workers, truck drivers)",Non-injecting drug users,Discordant Couples,usaid,cdc,dod,total_facility,total_subcounty,pct_of_subcounty_reached,pct_usaid_subcounty
0,Adjumani District,Adjumani Town Council,Adjumani Hospital,CDC,0.0,0.0,533.0,0.0,0.0,0.0,...,226.0,1.0,22.0,0,1,0,1075.0,1094.0,0.982633,0.0
1,Adjumani District,Adjumani Town Council,Adjumani Mission HC III,CDC,0.0,0.0,11.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0,1,0,19.0,1094.0,0.017367,0.0
2,Agago District,Adilang Town Council,Adilang HC III,USAID,0.0,0.0,220.0,0.0,0.0,3.0,...,12.0,0.0,17.0,1,0,0,507.0,507.0,1.0,1.0
3,Agago District,Kalongo Town Council,Kalongo Ambrosoli Memorial Hospital,USAID,0.0,0.0,2.0,0.0,0.0,0.0,...,18.0,0.0,5.0,1,0,0,27.0,27.0,1.0,1.0
4,Agago District,Patongo Town Council,Patongo HC IV,USAID,0.0,0.0,191.0,1.0,0.0,1.0,...,14.0,1.0,33.0,1,0,0,390.0,390.0,1.0,1.0


In [14]:
pp.shape

(677, 28)

In [15]:
pp.columns = pp.columns.str.strip()

In [16]:
pp.dtypes

DHIS2 District                                                                                           object
DHIS2 Subcounty                                                                                          object
DHIS2 HF Name                                                                                            object
Agency                                                                                                   object
MSM, Male                                                                                               float64
Transgender                                                                                             float64
Sex Workers                                                                                             float64
PWID                                                                                                    float64
People in prisons and other enclosed settings (Incarcerated Population)                                 

In [19]:
pp['District'] = pp['DHIS2 District']

In [21]:
pp['Subcounty'] = pp['DHIS2 Subcounty']

In [22]:
pp_pop = pd.merge(pp, pop, on=['District', 'Subcounty'], how='left')

In [23]:
pp_pop

Unnamed: 0,DHIS2 District,DHIS2 Subcounty,DHIS2 HF Name,Agency,"MSM, Male",Transgender,Sex Workers,PWID,People in prisons and other enclosed settings (Incarcerated Population),Pregnant,...,usaid,cdc,dod,total_facility,total_subcounty,pct_of_subcounty_reached,pct_usaid_subcounty,District,Subcounty,subcounty_pop_adjusted
0,Adjumani District,Adjumani Town Council,Adjumani Hospital,CDC,0.0,0.0,533.0,0.0,0.0,0.0,...,0,1,0,1075.0,1094.0,0.982633,0.0,Adjumani District,Adjumani Town Council,36495.99
1,Adjumani District,Adjumani Town Council,Adjumani Mission HC III,CDC,0.0,0.0,11.0,0.0,0.0,0.0,...,0,1,0,19.0,1094.0,0.017367,0.0,Adjumani District,Adjumani Town Council,36495.99
2,Agago District,Adilang Town Council,Adilang HC III,USAID,0.0,0.0,220.0,0.0,0.0,3.0,...,1,0,0,507.0,507.0,1.000000,1.0,Agago District,Adilang Town Council,
3,Agago District,Kalongo Town Council,Kalongo Ambrosoli Memorial Hospital,USAID,0.0,0.0,2.0,0.0,0.0,0.0,...,1,0,0,27.0,27.0,1.000000,1.0,Agago District,Kalongo Town Council,20056.16
4,Agago District,Patongo Town Council,Patongo HC IV,USAID,0.0,0.0,191.0,1.0,0.0,1.0,...,1,0,0,390.0,390.0,1.000000,1.0,Agago District,Patongo Town Council,11873.84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
672,Wakiso District,Wakiso Town Council,Wakiso HC IV,CDC,156.0,22.0,709.0,0.0,0.0,2.0,...,0,1,0,1457.0,1457.0,1.000000,0.0,Wakiso District,Wakiso Town Council,124032.60
673,Yumbe District,Lodonga Town Council,Lodonga HC IV,CDC,0.0,0.0,4.0,0.0,0.0,0.0,...,0,1,0,11.0,11.0,1.000000,0.0,Yumbe District,Lodonga Town Council,33578.00
674,Yumbe District,Yumbe Town Council,Yumbe HC IV,CDC,1.0,0.0,598.0,1.0,0.0,9.0,...,0,1,0,971.0,971.0,1.000000,0.0,Yumbe District,Yumbe Town Council,35850.18
675,Zombo District,Nyapea Subcounty,Nyapea Hospital,CDC,0.0,0.0,12.0,0.0,0.0,0.0,...,0,1,0,60.0,60.0,1.000000,0.0,Zombo District,Nyapea Subcounty,21824.67


In [13]:
#print(pp.columns.tolist())

In [24]:
pp_pop.groupby('Subcounty')['subcounty_pop_adjusted'].apply(lambda x: x.isna().sum()).sum()

68

In [25]:
pp_pop.groupby('Subcounty')['subcounty_pop_adjusted'].apply(lambda x: x.notna().sum()).sum()

609

In [26]:
pp_pop.shape

(677, 31)

In [27]:
pp_pop['Subcounty'].value_counts()

Subcounty
Central Division               25
Southern Division               9
Nakawa Division                 8
Eastern Division                7
Northern Division               7
                               ..
Kitswamba Town Council          1
Kitswamba Subcounty             1
Kisinga Town Council            1
Katwe Kabatoro Town Council     1
Paidha Town Council             1
Name: count, Length: 475, dtype: int64

In [28]:
pp_pop.shape

(677, 31)

In [29]:
pp_pop.to_csv('uganda_prep_with_pop.csv', index=False)