# DRC MICS6 SPSS Data 2017-18

In [144]:
import pyreadstat
import warnings
warnings.filterwarnings("ignore")
from googletrans import Translator
from googletrans import Translator
import pandas as pd
import numpy as np

In [145]:
def calculate_weighted_percentage(df, column_name):
    filtered_df = df[df['UB2'] == 1]
    weighted_percentage = round(filtered_df[column_name].mean() * 100,1)
    print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(column_name, weighted_percentage))

In [146]:
# Function to calculate vaccination coverage for each child_residence between specific age group
def calculate_vaccination_coverage_child_residence(df, residence, vaccine_column):
    filtered_df = df[(df['UB2']==1) & (df['residence']== residence)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [147]:
# Function to calculate vaccination coverage for each region between specific age group
def calculate_vaccination_coverage_region(df, region, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['UB2']==1) & (df['region'] == region)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(region))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, region, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated

In [148]:
# Function to calculate vaccination coverage for each child_gender between specific age group
def calculate_vaccination_coverage_child_gender(df, gender, vaccine_column):
    filtered_df = df[(df['UB2']==1) & (df['gender']== gender)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


# Reading the dataframe

In [149]:
df, meta = pyreadstat.read_sav("C:/Users/swalke/Desktop/RISP/DRCongo MICS6 SPSS Datafiles/ch.sav")

In [150]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM6BD")
value_labels

{0.0: 'NON SIGNALÉ',
 44.0: 'ENREGSTRÉ SUR LA CARTE',
 66.0: 'DECLARÉ PAR LA MAMAN',
 99.0: 'NON REPONSE'}

In [151]:
df['IM11'].value_counts()

IM11
1.0    6046
2.0    3586
8.0      69
9.0       3
Name: count, dtype: int64

In [152]:
# Define mapping variables
day_mapping = {range(1, 67): True, 99: np.nan, 0:False}
month_mapping ={range(1, 67): True, 99: np.nan, 0:False}
year_mapping = {2015:True, 2016:True, 2017:True, 2018:True, 2019:True, 4444:True, 6666:True, 9999:np.nan}

In [153]:
df['card'] = df['IM2'].replace({1:True, 2:True, 3:False, 4:False, 9:np.nan})

In [154]:
df['recall'] = df['IM11'].replace({1:True, 2: False, 8: np.nan, 9: np.nan})

In [155]:
df['bcgday'] = df['IM6BD'].replace(day_mapping)
df['bcgmonth'] = df['IM6BM'].replace(month_mapping)
df['bcgyr'] = df['IM6BY'].replace(year_mapping)

df['polio0day'] = df['IM6P0D'].replace(day_mapping)
df['polio0month'] = df['IM6P0M'].replace(month_mapping)
df['polio0yr'] = df['IM6P0Y'].replace(year_mapping)

df['polio1day'] = df['IM6P1D'].replace(day_mapping)
df['polio1month'] = df['IM6P1M'].replace(month_mapping)
df['polio1yr'] = df['IM6P1Y'].replace(year_mapping)

df['polio2day'] = df['IM6P2D'].replace(day_mapping)
df['polio2month'] = df['IM6P2M'].replace(month_mapping)
df['polio2yr'] = df['IM6P2Y'].replace(year_mapping)


df['polio3day'] = df['IM6P3D'].replace(day_mapping)
df['polio3month'] = df['IM6P3M'].replace(month_mapping)
df['polio3yr'] = df['IM6P3Y'].replace(year_mapping)


df['ipvday'] = df['IM6ID'].replace(day_mapping)
df['ipvmonth'] = df['IM6IM'].replace(month_mapping)
df['ipvyr'] = df['IM6IY'].replace(year_mapping)


df['penta1day'] = df['IM6PENTA1D'].replace(day_mapping)
df['penta1month'] = df['IM6PENTA1M'].replace(month_mapping)
df['penta1yr'] = df['IM6PENTA1Y'].replace(year_mapping)


df['penta2day'] = df['IM6PENTA2D'].replace(day_mapping)
df['penta2month'] = df['IM6PENTA2M'].replace(month_mapping)
df['penta2yr'] = df['IM6PENTA2Y'].replace(year_mapping)


df['penta3day'] = df['IM6PENTA3D'].replace(day_mapping)
df['penta3month'] = df['IM6PENTA3M'].replace(month_mapping)
df['penta3yr'] = df['IM6PENTA3Y'].replace(year_mapping)

df['mcvday'] = df['IM6MD'].replace(day_mapping)
df['mcvmonth'] = df['IM6MM'].replace(month_mapping)
df['mcvyr'] = df['IM6MY'].replace(year_mapping)


df['yfday'] = df['IM6YD'].replace(day_mapping)
df['yfmonth'] = df['IM6YM'].replace(month_mapping)
df['yfyr'] = df['IM6YY'].replace(year_mapping)

df['pcv1day'] = df['IM6PCV1D'].replace(day_mapping)
df['pcv1month'] = df['IM6PCV1M'].replace(month_mapping)
df['pcv1yr'] = df['IM6PCV1Y'].replace(year_mapping)

df['pcv2day'] = df['IM6PCV2D'].replace(day_mapping)
df['pcv2month'] = df['IM6PCV2M'].replace(month_mapping)
df['pcv2yr'] = df['IM6PCV2Y'].replace(year_mapping)

df['pcv3day'] = df['IM6PCV3D'].replace(day_mapping)
df['pcv3month'] = df['IM6PCV3M'].replace(month_mapping)
df['pcv3yr'] = df['IM6PCV3Y'].replace(year_mapping)

# BCG

In [156]:
df['bcg_card'] = df[['bcgday', 'bcgmonth','bcgyr']].max(axis=1)
df.loc[df['card'] == 0, 'bcg_card'] = 0

df['bcg_recall'] = df['IM14'].replace({1: 1, 2: 0,8: np.nan, 9: np.nan})
df.loc[df['recall'] == 0, 'bcg_recall'] = 0

df['bcg'] = df[['bcg_card', 'bcg_recall']].max(axis=1)
df['bcg_sampled']=df['bcg']*df['chweight']

In [157]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM17")
value_labels

{1.0: 'OUI', 2.0: 'NON', 8.0: 'NSP', 9.0: 'NON REPONSE'}

# Polio

In [158]:
#polio0
df['polio0_card'] = df[['polio0day', 'polio0month','polio0yr']].max(axis=1)
df.loc[df['card'] == False, 'polio0_card'] = False

df['polio0_recall1'] = df['IM17'].replace({1: 1, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio0_recall1'] = False

df['polio0'] = df[['polio0_card','polio0_recall1']].max(axis=1)
df['polio0_sampled']=df['polio0']*df['chweight']

In [159]:
#Polio1
df['polio1_card'] = df[['polio1day', 'polio1month','polio1yr']].max(axis=1)
df.loc[df['card'] == False, 'polio1_card'] = False

df['polio1_recall1'] = df['IM18'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall1'] = False

df['polio1'] = df[['polio1_card','polio1_recall1']].max(axis=1)
df['polio1_sampled']=df['polio1']*df['chweight']

In [160]:
#Polio2
df['polio2_card'] = df[['polio2day', 'polio2month','polio2yr']].max(axis=1)
df.loc[df['card'] == False, 'polio2_card'] = False

df['polio2_recall'] = df['IM18'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio2_recall'] = False

df['polio2'] = df[['polio2_card','polio2_recall']].max(axis=1)
df['polio2_sampled']=df['polio2']*df['chweight']

In [161]:
#Polio3
df['polio3_card'] = df[['polio3day', 'polio3month','polio3yr']].max(axis=1)
df.loc[df['card'] == False, 'polio3_card'] = False

df['polio3_recall'] = df['IM18'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio3_recall'] = False

df['polio3'] = df[['polio3_card','polio3_recall']].max(axis=1)
df['polio3_sampled']=df['polio3']*df['chweight']

# Penta

In [162]:
#penta1

df['penta1_card'] = df[['penta1day', 'penta1month','penta1yr']].max(axis=1)
df.loc[df['card'] == False, 'penta1_card'] = False

df['penta1_recall'] = df['IM21'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta1'] = df[['penta1_card', 'penta1_recall']].max(axis=1)
df['penta1_sampled']=df['penta1']*df['chweight']

In [163]:
# penta2
df['penta2_card'] = df[['penta2day', 'penta2month','penta2yr']].max(axis=1)
df.loc[df['card'] == False, 'penta2_card'] = False

df['penta2_recall'] = df['IM21'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta2'] = df[['penta2_card', 'penta2_recall']].max(axis=1)
df['penta2_sampled']=df['penta2']*df['chweight']

In [164]:
# penta3
df['penta3_card'] = df[['penta3day', 'penta3month','penta3yr']].max(axis=1)
df.loc[df['card'] == False, 'penta3_card'] = False

df['penta3_recall'] = df['IM21'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta3'] = df[['penta3_card', 'penta3_recall']].max(axis=1)
df['penta3_sampled']=df['penta3']*df['chweight']

# Yellow Fever

In [165]:
df['yf_card'] = df[['yfday', 'yfmonth','yfyr']].max(axis=1)
df.loc[df['card'] == False, 'yf_card'] = False

df['yf_recall'] = df['IM27'].replace({1: True, 2: False, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'yf_recall'] = False

df['yf'] = df[['yf_card', 'yf_recall']].max(axis=1)
df['yf_sampled']=df['yf']*df['chweight']

# PCV

In [166]:
#pcv1
df['pcv1_card'] = df[['pcv1day', 'pcv1month','pcv1yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv1_card'] = False

df['pcv1_recall1'] = df['IM23'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv1_recall1'] = False

df['pcv1'] = df[['pcv1_card','pcv1_recall1']].max(axis=1)
df['pcv1_sampled']=df['pcv1']*df['chweight']




In [167]:
#pcv2
df['pcv2_card'] = df[['pcv2day', 'pcv2month','pcv2yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv2_card'] = False

df['pcv2_recall'] = df['IM23'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv2_recall'] = False

df['pcv2'] = df[['pcv2_card','pcv2_recall']].max(axis=1)
df['pcv2_sampled']=df['pcv2']*df['chweight']

In [168]:

#pcv3
df['pcv3_card'] = df[['pcv3day', 'pcv3month','pcv3yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv3_card'] = False

df['pcv3_recall'] = df['IM23'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv3_recall'] = False

df['pcv3'] = df[['pcv3_card','pcv3_recall']].max(axis=1)
df['pcv3_sampled']=df['pcv3']*df['chweight']

# IPV

In [169]:
df['ipv'] = df[['ipvday', 'ipvmonth','ipvyr']].max(axis=1)
df.loc[df['card'] == False, 'ipv'] = False

#df['ipv_recall'] = df['IM23'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
#df.loc[df['recall'] == False, 'ipv_recall'] = False

#df['ipv'] = df[['ipv_card','ipv_recall']].max(axis=1)
df['ipv_sampled']=df['ipv']*df['chweight']

# mcv

In [170]:
df['mcv_card'] = df[['mcvday', 'mcvmonth','mcvyr']].max(axis=1)
df.loc[df['card'] == False, 'mcv_card'] = False

df['mcv_recall'] = df['IM26'].replace({1: 1, 2: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'mcv_recall'] = False

df['mcv'] = df[['mcv_card','mcv_recall']].max(axis=1)
df['mcv_sampled']=df['mcv']*df['chweight']

# Vaccination Calulations

In [171]:
calculate_weighted_percentage(df, 'bcg_sampled')
calculate_weighted_percentage(df, 'polio0_sampled')
calculate_weighted_percentage(df, 'polio1_sampled')
calculate_weighted_percentage(df, 'polio3_sampled')
calculate_weighted_percentage(df, 'penta1_sampled')
calculate_weighted_percentage(df, 'penta3_sampled')
calculate_weighted_percentage(df, 'yf_sampled')
calculate_weighted_percentage(df, 'pcv1_sampled')
calculate_weighted_percentage(df, 'pcv3_sampled')
calculate_weighted_percentage(df, 'mcv_sampled')
calculate_weighted_percentage(df, 'ipv_sampled')

Weighted percentage of 'bcg_sampled' if 'UB2' is 1: 73.3
Weighted percentage of 'polio0_sampled' if 'UB2' is 1: 69.1
Weighted percentage of 'polio1_sampled' if 'UB2' is 1: 73.1
Weighted percentage of 'polio3_sampled' if 'UB2' is 1: 48.2
Weighted percentage of 'penta1_sampled' if 'UB2' is 1: 65.3
Weighted percentage of 'penta3_sampled' if 'UB2' is 1: 46.6
Weighted percentage of 'yf_sampled' if 'UB2' is 1: 56.4
Weighted percentage of 'pcv1_sampled' if 'UB2' is 1: 63.1
Weighted percentage of 'pcv3_sampled' if 'UB2' is 1: 44.8
Weighted percentage of 'mcv_sampled' if 'UB2' is 1: 57.4
Weighted percentage of 'ipv_sampled' if 'UB2' is 1: 17.5


In [172]:
# rename columns in dataframe
df = df.rename(columns={'HL4': 'gender','HH6':'residence','HH7':'region'})

In [173]:
# List of Region
region_mapping = {1.0: 'Kinshasa',
 2.0: 'Kongo Central',
 3.0: 'Kwango',
 4.0: 'Kwilu',
 5.0: 'Maindombe',
 6.0: 'Equateur',
 7.0: 'Sud Ubangi',
 8.0: 'Nord Ubangi',
 9.0: 'Mongala',
 10.0: 'Tshuapa',
 11.0: 'Tshopo',
 12.0: 'Bas Uele',
 13.0: 'Haut Uele',
 14.0: 'Ituri',
 15.0: 'Nord Kivu',
 16.0: 'Sud Kivu',
 17.0: 'Maniema',
 18.0: 'Haut Katanga',
 19.0: 'Lualaba',
 20.0: 'Haut Lomami',
 21.0: 'Tanganyika',
 22.0: 'Lomami',
 23.0: 'Kasai Oriental',
 24.0: 'Sankuru',
 25.0: 'Kasai Central',
 26.0: 'Kasai'}
residence_mapping = {1.0: 'URBAIN', 2.0: 'RURAL'}
gender_mapping = {1.0: 'MASC', 2.0: 'FEM'}

In [174]:
# Apply mappings to DataFrame columns
df['gender'] = df['gender'].replace(gender_mapping)
df['region'] = df['region'].replace(region_mapping)
df['residence'] = df['residence'].replace(residence_mapping)

In [175]:
# List of vaccines
vaccines = ['bcg_sampled','polio0_sampled','polio1_sampled','polio3_sampled','penta1_sampled','penta3_sampled','yf_sampled','pcv1_sampled','pcv3_sampled','mcv_sampled','ipv_sampled']

In [176]:
# List of vaccines
vaccines1 = ['bcg','polio0','polio1','polio3','penta1','penta3','yf','pcv1','pcv3','mcv','ipv']

In [177]:
regions=['Kinshasa','Kongo Central','Kwango','Kwilu','Maindombe', 'Equateur','Sud Ubangi','Nord Ubangi',
 'Mongala','Tshuapa','Tshopo','Bas Uele','Haut Uele','Ituri','Nord Kivu','Sud Kivu',
  'Maniema','Haut Katanga','Lualaba', 'Haut Lomami','Tanganyika', 'Lomami','Kasai Oriental',
'Sankuru','Kasai Central','Kasai']

In [178]:
# List of genders
genders = ['MASC','FEM']

In [179]:
residences=['URBAIN','RURAL']

In [180]:
df['gender'].value_counts()

gender
FEM     10844
MASC    10633
Name: count, dtype: int64

In [181]:
# Create an empty list to store the results
results = []

# Loop through each vaccine and child_gender
for vaccine in vaccines:
    for gender in genders:
        coverage = calculate_vaccination_coverage_child_gender(df, gender, vaccine)
        results.append({'Vaccine': vaccine, 'gender': gender, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results_df.pivot_table(index='Vaccine', columns='gender', values='Coverage')
pivot_table1

gender,FEM,MASC
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg_sampled,73.6,73.0
ipv_sampled,16.9,18.0
mcv_sampled,58.4,56.3
pcv1_sampled,63.0,63.2
pcv3_sampled,43.6,45.9
penta1_sampled,65.1,65.5
penta3_sampled,44.9,48.3
polio0_sampled,69.3,68.8
polio1_sampled,72.1,74.2
polio3_sampled,47.3,49.2


In [182]:
# Create an empty list to store the results
results1 = []

# Loop through each vaccine and child_residence
for vaccine in vaccines1:
    for residence in residences:
        coverage = calculate_vaccination_coverage_child_residence(df, residence, vaccine)
        results1.append({'Vaccine': vaccine, 'residence': residence, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results1_df = pd.DataFrame(results1)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results1_df.pivot_table(index='Vaccine', columns='residence', values='Coverage')
pivot_table1

residence,RURAL,URBAIN
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg,57.7,76.9
ipv,11.2,23.7
mcv,42.7,58.4
pcv1,46.5,66.5
pcv3,29.3,46.6
penta1,49.4,69.7
penta3,31.1,49.6
polio0,51.5,72.4
polio1,61.0,75.1
polio3,36.4,51.7


In [183]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_region
for vaccine in vaccines1:
    for region in regions:
        coverage = calculate_vaccination_coverage_region(df, region, vaccine)
        results2.append({'Vaccine': vaccine, 'region': region, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='region', values='Coverage')
pivot_table2
#pivot_table2.to_excel('/path/to/directory/pivot_table2_sorted.xlsx')
#pivot_table2.to_excel('Vaccinebyregion.xlsx', index=False)

region,Bas Uele,Equateur,Haut Katanga,Haut Lomami,Haut Uele,Ituri,Kasai,Kasai Central,Kasai Oriental,Kinshasa,...,Maniema,Mongala,Nord Kivu,Nord Ubangi,Sankuru,Sud Kivu,Sud Ubangi,Tanganyika,Tshopo,Tshuapa
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bcg,68.2,55.9,83.6,65.0,72.9,85.9,56.2,81.2,74.3,92.1,...,37.7,37.7,85.1,55.0,19.8,89.1,65.3,45.6,30.3,57.3
ipv,17.6,9.2,20.4,20.6,7.8,16.7,16.2,22.2,25.3,31.1,...,1.3,5.7,32.1,17.2,1.6,11.8,21.4,8.9,5.2,0.0
mcv,46.2,40.8,63.2,49.5,48.3,74.6,33.1,66.3,43.2,81.0,...,27.8,23.4,73.0,40.4,7.8,68.2,42.2,33.1,21.3,36.3
pcv1,52.7,50.7,69.7,57.3,68.1,81.6,34.8,77.9,61.2,86.4,...,26.9,22.7,82.2,31.1,14.1,79.5,48.8,33.1,22.6,36.7
pcv3,26.0,32.9,50.7,39.5,31.0,53.9,24.2,51.9,42.6,68.8,...,9.4,12.1,80.1,19.2,4.2,60.0,32.3,18.5,13.5,19.5
penta1,60.3,54.6,71.9,56.8,74.1,85.0,41.6,80.1,63.9,85.2,...,29.6,22.7,84.2,37.7,18.2,83.2,53.0,33.6,24.7,38.0
penta3,30.5,34.2,52.3,43.6,34.5,55.7,27.0,56.4,43.7,69.7,...,11.9,13.5,81.5,21.2,5.2,61.6,32.3,19.2,14.3,19.9
polio0,53.8,46.1,78.0,59.5,66.9,79.4,43.3,77.3,70.5,90.4,...,34.4,28.0,83.0,46.4,14.1,84.4,63.5,38.7,27.7,43.5
polio1,71.2,59.9,75.1,73.6,75.4,83.7,61.2,84.5,69.9,87.1,...,45.0,44.4,83.7,51.0,32.3,88.0,64.1,46.8,38.7,62.9
polio3,36.4,35.5,55.7,56.8,39.8,50.4,32.0,56.4,41.0,66.9,...,24.4,27.5,78.9,26.5,13.0,58.9,39.5,26.6,21.3,37.1


In [184]:
pivot_table2.to_clipboard(index=True, header=True)

# retention rate

In [185]:
def calculate_retention_rate(df, group_col, dose1_col, dose2_col):
    """
    Calculate retention rate from dose2 to dose1 for each group.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame containing vaccination data.
    group_col (str): Column name for the grouping variable.
    dose1_col (str): Column name representing the first dose.
    dose2_col (str): Column name representing the second dose.
    
    Returns:
    pandas.DataFrame: DataFrame with retention rates calculated and added as a new column.
    """
    # Group by the specified column and aggregate sum of dose1 and dose2
    df1 = df.groupby([group_col]).agg(
        sum_dose1=(dose1_col, 'sum'),
        sum_dose2=(dose2_col, 'sum')
    ).reset_index()
    
    # Calculate retention rate
    df1['retention_rate'] = ((1 - ((df1['sum_dose1'] - df1['sum_dose2']) / df1['sum_dose1'])) * 100)
    pivot_table2 = df1.pivot_table(columns=group_col, values='retention_rate')
    return pivot_table2
  

In [186]:
 filtered_df = df[(df['UB2']==1)]

In [187]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'region', 'penta1_sampled', 'penta3_sampled')

# Copy the result to the clipboard
result_df.to_clipboard(index=True, header=True)


In [188]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'region', 'polio1_sampled', 'polio3_sampled')

# Copy the result to the clipboard
result_df.to_clipboard(index=True, header=True)


In [189]:
calculate_retention_rate(filtered_df, 'residence', 'penta1_sampled', 'penta3_sampled')

residence,RURAL,URBAIN
retention_rate,68.358132,74.947694


In [190]:
# Call the calculate_retention_rate function
calculate_retention_rate(filtered_df, 'residence', 'polio1_sampled', 'polio3_sampled')

residence,RURAL,URBAIN
retention_rate,62.402506,70.540995


In [191]:
calculate_retention_rate(filtered_df, 'gender', 'penta1_sampled', 'penta3_sampled')

gender,FEM,MASC
retention_rate,68.981816,73.71197


In [192]:
calculate_retention_rate(filtered_df, 'gender', 'polio1_sampled', 'polio3_sampled')

gender,FEM,MASC
retention_rate,65.564219,66.281813


# National retention Rate

In [241]:
polio_retention_rate = ((1 - ((filtered_df['polio1'].sum() - filtered_df['polio3'].sum()) / filtered_df['polio1'].sum())) * 100)
polio_retention_rate

62.82682834406972

In [242]:
penta_retention_rate = ((1 - ((filtered_df['penta1'].sum() - filtered_df['penta3'].sum()) / filtered_df['penta1'].sum())) * 100)
penta_retention_rate

65.84821428571428

# Fully Vaccinated

In [238]:
# Filter out rows with null values in relevant columns
filtered_df = filtered_df.dropna(subset=['bcg', 'polio3', 'mcv', 'penta3'])

In [239]:
def calculate_percentage_fully_vaccinated(df):
 # Filter the DataFrame to get fully vaccinated children
    fully_vaccinated = df[(df['bcg'] == 1) & 
                          (df['polio3'] == 1) & 
                          (df['mcv'] == 1) & 
                          (df['penta3'] == 1)]
    
    # Calculate the number of fully vaccinated kids
    num_fully_vaccinated = len(fully_vaccinated)
    
    # Calculate the total number of kids
    total_kids = len(df)
    
    # Calculate the percentage of fully vaccinated kids
    percentage_fully_vaccinated = (num_fully_vaccinated / total_kids) * 100
    
    return percentage_fully_vaccinated

In [240]:
calculate_percentage_fully_vaccinated(filtered_df)

27.5615763546798

# Zero dose Childern

In [200]:
def calculate_zero_dose_percentage_by_group(df, vaccine_column, group_column):
    # Group the DataFrame by group and count the total number of children in each group
    total_children_by_group = df.groupby(group_column).size()

    # Group the DataFrame by group and count the number of zero-dose children in each group
    zero_dose_children_by_group = df[df[vaccine_column] == 0].groupby(group_column).size()

    # Calculate the percentage of zero-dose children for each group
    zero_dose_percentage_by_group = round((zero_dose_children_by_group / total_children_by_group) * 100,1)

    return zero_dose_percentage_by_group

In [234]:
def calculate_zero_dose_proportion(df, vaccine_column):
    # Count the number of children who never received the Penta vaccination
    zero_dose_children = df[df[vaccine_column] == 0][vaccine_column].count()

    # Count the total number of children in the dataset
    total_children = df['penta1'].count()
    #total_children=len(df)

    # Calculate the proportion of zero dose children
    proportion_zero_dose = (zero_dose_children / total_children) * 100

    return proportion_zero_dose

In [235]:
zero_dose_proportion = calculate_zero_dose_proportion(filtered_df, 'penta1')
print(f"Proportion of zero dose children: {zero_dose_proportion:.2f}%")

Proportion of zero dose children: 44.77%


In [212]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(filtered_df, 'penta1', 'region')

print(zero_dose_percentage_by_group)

region
Bas Uele          37.1
Equateur          42.9
Haut Katanga      27.5
Haut Lomami       42.4
Haut Uele         25.0
Ituri             13.7
Kasai             58.4
Kasai Central     19.8
Kasai Oriental    36.1
Kinshasa          13.8
Kongo Central     25.2
Kwango            49.4
Kwilu             30.2
Lomami            37.5
Lualaba           46.8
Maindombe         46.0
Maniema           67.9
Mongala           73.6
Nord Kivu         14.0
Nord Ubangi       61.4
Sankuru           81.3
Sud Kivu          16.5
Sud Ubangi        41.4
Tanganyika        66.4
Tshopo            72.5
Tshuapa           60.2
dtype: float64
