# Pakistan Khyber Pakhtunkhwa 2019 MICS6 Datasets

In [1]:
import pyreadstat
import warnings
warnings.filterwarnings("ignore")
from googletrans import Translator
from googletrans import Translator
import pandas as pd
import numpy as np

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
def calculate_weighted_percentage(df, column_name):
    filtered_df = df[df['UB2'] == 1]
    weighted_percentage = round(filtered_df[column_name].mean() * 100,1)
    print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(column_name, weighted_percentage))

In [3]:
# Function to calculate vaccination coverage for each child_residence between specific age group
def calculate_vaccination_coverage_child_residence(df, residence, vaccine_column):
    filtered_df = df[(df['UB2']==1) & (df['residence']== residence)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [4]:
# Function to calculate vaccination coverage for each region between specific age group
def calculate_vaccination_coverage_region(df, region, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['UB2']==1) & (df['region'] == region)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(region))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, region, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated

In [5]:
# Function to calculate vaccination coverage for each child_gender between specific age group
def calculate_vaccination_coverage_child_gender(df, gender, vaccine_column):
    filtered_df = df[(df['UB2']==1) & (df['gender']== gender)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [85]:
def calculate_vaccination_coverage_district(df, district, vaccine_column):
    # Filter rows where district matches
    filtered_df = df[(df['UB2']==1) & (df['district'] == district)]
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified district: {}.".format(district))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100, 1)
    return percentage_vaccinated

# Reading the dataframe

In [6]:
df, meta = pyreadstat.read_sav("C:/Users/swalke/Desktop/RISP/PAK Khyber 2019.sav")

In [7]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM6BD")
value_labels

{0.0: 'NOT GIVEN',
 44.0: 'MARKED ON CARD',
 66.0: 'MOTHER REPORTED',
 97.0: 'Inconsistent',
 98.0: 'DK',
 99.0: 'NO RESPONSE'}

In [8]:
df['IM6BY'].value_counts()

IM6BY
2018.0    2444
2017.0    1792
2019.0    1362
2016.0     602
9999.0      19
Name: count, dtype: int64

In [9]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM2")
value_labels

{1.0: 'YES, HAS ONLY CARD(S)',
 2.0: 'YES, HAS ONLY OTHER DOCUMENT',
 3.0: 'YES, HAS CARD(S) AND OTHER DOCUMENT',
 4.0: 'NO, HAS NO CARDS AND NO OTHER DOCUMENT',
 9.0: 'NO RESPONSE'}

In [10]:
# Define mapping variables
day_mapping = {range(1, 67): True,97.0:True, 99: np.nan, 0.0:False}
month_mapping ={range(1, 67): True, 97.07: True, 99: np.nan, 0.0:False}
year_mapping = {2014:True, 2015:True, 2016:True, 2017:True, 2018:True, 2019:True, 9997.0:True, 4444:True, 6666:True, 9999:np.nan}

In [11]:
df['card'] = df['IM2'].replace({1:True, 2:True, 3:False, 4:False, 9:np.nan})

In [12]:
df['recall'] = df['IM11'].replace({1:True, 2: False, 8: np.nan, 9: np.nan})

In [54]:
df['bcgday'] = df['IM6BD'].replace(day_mapping)
df['bcgmonth'] = df['IM6BM'].replace(month_mapping)
df['bcgyr'] = df['IM6BY'].replace(year_mapping)

df['polio0day'] = df['IM6P0D'].replace(day_mapping)
df['polio0month'] = df['IM6P0M'].replace(month_mapping)
df['polio0yr'] = df['IM6P0Y'].replace(year_mapping)


df['polio1day'] = df['IM6P1D'].replace(day_mapping)
df['polio1month'] = df['IM6P1M'].replace(month_mapping)
df['polio1yr'] = df['IM6P1Y'].replace(year_mapping)

df['polio2day'] = df['IM6P2D'].replace(day_mapping)
df['polio2month'] = df['IM6P2M'].replace(month_mapping)
df['polio2yr'] = df['IM6P2Y'].replace(year_mapping)


df['polio3day'] = df['IM6P3D'].replace(day_mapping)
df['polio3month'] = df['IM6P3M'].replace(month_mapping)
df['polio3yr'] = df['IM6P3Y'].replace(year_mapping)


df['ipvday'] = df['IM6ID'].replace(day_mapping)
df['ipvmonth'] = df['IM6IM'].replace(month_mapping)
df['ipvyr'] = df['IM6IY'].replace(year_mapping)


df['penta1day'] = df['IM6PENTA1D'].replace(day_mapping)
df['penta1month'] = df['IM6PENTA1M'].replace(month_mapping)
df['penta1yr'] = df['IM6PENTA1Y'].replace(year_mapping)


df['penta2day'] = df['IM6PENTA2D'].replace(day_mapping)
df['penta2month'] = df['IM6PENTA2M'].replace(month_mapping)
df['penta2yr'] = df['IM6PENTA2Y'].replace(year_mapping)


df['penta3day'] = df['IM6PENTA3D'].replace(day_mapping)
df['penta3month'] = df['IM6PENTA3M'].replace(month_mapping)
df['penta3yr'] = df['IM6PENTA3Y'].replace(year_mapping)

df['measlesday'] = df['IM6M1D'].replace(day_mapping)
df['measlesmonth'] = df['IM6M1M'].replace(month_mapping)
df['measlesyr'] = df['IM6M1Y'].replace(year_mapping)


#df['yfday'] = df['IM6YD'].replace(day_mapping)
#df['yfmonth'] = df['IM6YM'].replace(month_mapping)
#df['yfyr'] = df['IM6YY'].replace(year_mapping)

df['pcv1day'] = df['IM6PCV1D'].replace(day_mapping)
df['pcv1month'] = df['IM6PCV1M'].replace(month_mapping)
df['pcv1yr'] = df['IM6PCV1Y'].replace(year_mapping)

df['pcv2day'] = df['IM6PCV2D'].replace(day_mapping)
df['pcv2month'] = df['IM6PCV2M'].replace(month_mapping)
df['pcv2yr'] = df['IM6PCV2Y'].replace(year_mapping)

df['pcv3day'] = df['IM6PCV3D'].replace(day_mapping)
df['pcv3month'] = df['IM6PCV3M'].replace(month_mapping)
df['pcv3yr'] = df['IM6PCV3Y'].replace(year_mapping)

# BCG

In [14]:
df['bcg_card'] = df[['bcgday', 'bcgmonth','bcgyr']].max(axis=1)
df.loc[df['card'] == 0, 'bcg_card'] = 0

df['bcg_recall'] = df['IM14'].replace({1: 1, 2: 0,8: np.nan, 9: np.nan})
df.loc[df['recall'] == 0, 'bcg_recall'] = 0

df['bcg'] = df[['bcg_card', 'bcg_recall']].max(axis=1)
df['bcg_sampled']=df['bcg']*df['chweight']

In [15]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("hh7")
value_labels

In [16]:
calculate_weighted_percentage(df, 'bcg')

Weighted percentage of 'bcg' if 'UB2' is 1: 77.2


In [17]:
calculate_weighted_percentage(df, 'bcg_sampled')

Weighted percentage of 'bcg_sampled' if 'UB2' is 1: 82.0


# Polio

In [55]:
#polio0
df['polio0_card'] = df[['polio0day', 'polio0month','polio0yr']].max(axis=1)
df.loc[df['card'] == False, 'polio0_card'] = False

df['polio0_recall1'] = df['IM17'].replace({1: 1, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio0_recall1'] = False

df['polio0'] = df[['polio0_card','polio0_recall1']].max(axis=1)
df['polio0_sampled']=df['polio0']*df['chweight']

In [18]:
#Polio1
df['polio1_card'] = df[['polio1day', 'polio1month','polio1yr']].max(axis=1)
df.loc[df['card'] == False, 'polio1_card'] = False

df['polio1_recall1'] = df['IM18'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall1'] = False

df['polio1'] = df[['polio1_card','polio1_recall1']].max(axis=1)
df['polio1_sampled']=df['polio1']*df['chweight']

In [19]:
#Polio2
df['polio2_card'] = df[['polio2day', 'polio2month','polio2yr']].max(axis=1)
df.loc[df['card'] == False, 'polio2_card'] = False

df['polio2_recall'] = df['IM18'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio2_recall'] = False

df['polio2'] = df[['polio2_card','polio2_recall']].max(axis=1)
df['polio2_sampled']=df['polio2']*df['chweight']

In [20]:
#Polio3
df['polio3_card'] = df[['polio3day', 'polio3month','polio3yr']].max(axis=1)
df.loc[df['card'] == False, 'polio3_card'] = False

df['polio3_recall'] = df['IM18'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio3_recall'] = False

df['polio3'] = df[['polio3_card','polio3_recall']].max(axis=1)
df['polio3_sampled']=df['polio3']*df['chweight']

# Penta

In [21]:
#penta1

df['penta1_card'] = df[['penta1day', 'penta1month','penta1yr']].max(axis=1)
df.loc[df['card'] == False, 'penta1_card'] = False

df['penta1_recall'] = df['IM21'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta1'] = df[['penta1_card', 'penta1_recall']].max(axis=1)
df['penta1_sampled']=df['penta1']*df['chweight']

In [22]:
# penta2
df['penta2_card'] = df[['penta2day', 'penta2month','penta2yr']].max(axis=1)
df.loc[df['card'] == False, 'penta2_card'] = False

df['penta2_recall'] = df['IM21'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta2'] = df[['penta2_card', 'penta2_recall']].max(axis=1)
df['penta2_sampled']=df['penta2']*df['chweight']

In [23]:
# penta3
df['penta3_card'] = df[['penta3day', 'penta3month','penta3yr']].max(axis=1)
df.loc[df['card'] == False, 'penta3_card'] = False

df['penta3_recall'] = df['IM21'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta3'] = df[['penta3_card', 'penta3_recall']].max(axis=1)
df['penta3_sampled']=df['penta3']*df['chweight']

# Yellow Fever

# PCV

In [24]:
#pcv1
df['pcv1_card'] = df[['pcv1day', 'pcv1month','pcv1yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv1_card'] = False

df['pcv1_recall1'] = df['IM23'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv1_recall1'] = False

df['pcv1'] = df[['pcv1_card','pcv1_recall1']].max(axis=1)
df['pcv1_sampled']=df['pcv1']*df['chweight']




In [25]:
#pcv2
df['pcv2_card'] = df[['pcv2day', 'pcv2month','pcv2yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv2_card'] = False

df['pcv2_recall'] = df['IM23'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv2_recall'] = False

df['pcv2'] = df[['pcv2_card','pcv2_recall']].max(axis=1)
df['pcv2_sampled']=df['pcv2']*df['chweight']

In [26]:

#pcv3
df['pcv3_card'] = df[['pcv3day', 'pcv3month','pcv3yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv3_card'] = False

df['pcv3_recall'] = df['IM23'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv3_recall'] = False

df['pcv3'] = df[['pcv3_card','pcv3_recall']].max(axis=1)
df['pcv3_sampled']=df['pcv3']*df['chweight']

# IPV

In [27]:
df['ipv'] = df[['ipvday', 'ipvmonth','ipvyr']].max(axis=1)
df.loc[df['card'] == False, 'ipv'] = False

#df['ipv_recall'] = df['IM23'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
#df.loc[df['recall'] == False, 'ipv_recall'] = False

#df['ipv'] = df[['ipv_card','ipv_recall']].max(axis=1)
df['ipv_sampled']=df['ipv']*df['chweight']

# measles

In [28]:
df['measles_card'] = df[['measlesday', 'measlesmonth','measlesyr']].max(axis=1)
df.loc[df['card'] == False, 'measles_card'] = False

df['measles_recall'] = df['IM26'].replace({1: 1, 2: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'measles_recall'] = False

df['measles'] = df[['measles_card','measles_recall']].max(axis=1)
df['measles_sampled']=df['measles']*df['chweight']

# Vaccination Calulations

In [56]:
calculate_weighted_percentage(df, 'bcg_sampled')
calculate_weighted_percentage(df, 'polio0_sampled')
calculate_weighted_percentage(df, 'polio1_sampled')
calculate_weighted_percentage(df, 'polio3_sampled')
calculate_weighted_percentage(df, 'penta1_sampled')
calculate_weighted_percentage(df, 'penta3_sampled')
#calculate_weighted_percentage(df, 'yf_sampled')
calculate_weighted_percentage(df, 'pcv1_sampled')
calculate_weighted_percentage(df, 'pcv3_sampled')
calculate_weighted_percentage(df, 'measles_sampled')
calculate_weighted_percentage(df, 'ipv_sampled')

Weighted percentage of 'bcg_sampled' if 'UB2' is 1: 82.0
Weighted percentage of 'polio0_sampled' if 'UB2' is 1: 87.9
Weighted percentage of 'polio1_sampled' if 'UB2' is 1: 71.9
Weighted percentage of 'polio3_sampled' if 'UB2' is 1: 59.8
Weighted percentage of 'penta1_sampled' if 'UB2' is 1: 76.6
Weighted percentage of 'penta3_sampled' if 'UB2' is 1: 67.8
Weighted percentage of 'pcv1_sampled' if 'UB2' is 1: 73.5
Weighted percentage of 'pcv3_sampled' if 'UB2' is 1: 64.5
Weighted percentage of 'measles_sampled' if 'UB2' is 1: 65.3
Weighted percentage of 'ipv_sampled' if 'UB2' is 1: 53.3


In [57]:
calculate_weighted_percentage(df, 'bcg')
calculate_weighted_percentage(df, 'polio0')
calculate_weighted_percentage(df, 'polio1')
calculate_weighted_percentage(df, 'polio3')
calculate_weighted_percentage(df, 'penta1')
calculate_weighted_percentage(df, 'penta3')
#calculate_weighted_percentage(df, 'yf_sampled')
calculate_weighted_percentage(df, 'pcv1')
calculate_weighted_percentage(df, 'pcv3')
calculate_weighted_percentage(df, 'measles')
calculate_weighted_percentage(df, 'ipv')

Weighted percentage of 'bcg' if 'UB2' is 1: 77.2
Weighted percentage of 'polio0' if 'UB2' is 1: 79.3
Weighted percentage of 'polio1' if 'UB2' is 1: 64.9
Weighted percentage of 'polio3' if 'UB2' is 1: 50.9
Weighted percentage of 'penta1' if 'UB2' is 1: 69.2
Weighted percentage of 'penta3' if 'UB2' is 1: 57.6
Weighted percentage of 'pcv1' if 'UB2' is 1: 66.4
Weighted percentage of 'pcv3' if 'UB2' is 1: 54.7
Weighted percentage of 'measles' if 'UB2' is 1: 56.7
Weighted percentage of 'ipv' if 'UB2' is 1: 44.2


In [88]:
# rename columns in dataframe
df = df.rename(columns={'HL4': 'gender','HH6':'residence','division':'district','HH7':'region'})

In [89]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("division")
value_labels

{1.0: 'Bannu',
 2.0: 'D.I. Khan',
 3.0: 'Hazara',
 4.0: 'Kohat',
 5.0: 'Malakand',
 6.0: 'Mardan',
 7.0: 'Peshawar'}

In [90]:
district_mapping={1.0: 'Bannu',
 2.0: 'D.I. Khan',
 3.0: 'Hazara',
 4.0: 'Kohat',
 5.0: 'Malakand',
 6.0: 'Mardan',
 7.0: 'Peshawar'}

In [91]:
df['gender'].value_counts()

gender
Male      12558
Female    11787
Name: count, dtype: int64

In [92]:
# List of Region
region_mapping = {1.0: 'Abbotabad',
 2.0: 'Bannu',
 3.0: 'Batagram',
 4.0: 'Buner',
 5.0: 'Charsadda',
 6.0: 'Chitral',
 7.0: 'Dera Ismail Khan',
 8.0: 'Hangu',
 9.0: 'Hari Pur',
 10.0: 'Karak',
 11.0: 'Kohat',
 12.0: 'Kohistan',
 13.0: 'Laki Marwat',
 14.0: 'Lower Dir',
 15.0: 'Malakand',
 16.0: 'Mansehra',
 17.0: 'Mardan',
 18.0: 'Nowshehra',
 19.0: 'Peshawar',
 20.0: 'Shangla',
 21.0: 'Swabi',
 22.0: 'Swat',
 23.0: 'Tank',
 24.0: 'Torghar',
 25.0: 'Upper Dir',
 26.0: 'Bajor',
 27.0: 'Khyber',
 28.0: 'Kuram',
 29.0: 'Mohmind',
 30.0: 'North Waziristan',
 31.0: 'Orakzai',
 32.0: 'South Waziristan'}
residence_mapping = {1.0: 'Urban', 2.0: 'Rural'}
gender_mapping = {1.0: 'Male', 2.0: 'Female'}

In [93]:
# Apply mappings to DataFrame columns
df['gender'] = df['gender'].replace(gender_mapping)
df['region'] = df['region'].replace(region_mapping)
df['residence'] = df['residence'].replace(residence_mapping)
df['district']=df['district'].replace(district_mapping)

In [94]:
df['district'].value_counts()

district
Malakand     6665
Peshawar     4810
Hazara       4162
Kohat        3387
Bannu        1907
Mardan       1708
D.I. Khan    1706
Name: count, dtype: int64

In [65]:
# List of vaccines
vaccines = ['bcg_sampled','polio0_sampled','polio1_sampled','polio3_sampled','penta1_sampled','penta3_sampled','pcv1_sampled','pcv3_sampled','ipv_sampled','measles_sampled']

In [73]:
# List of vaccines
vaccines1 = ['bcg','polio0','polio1','polio3','penta1','penta3','pcv1','pcv3','ipv','measles']

In [79]:
districts=['Bannu','D.I. Khan','Hazara','Kohat','Mardan','Peshawar','Malakand']

In [67]:
regions=['Abbotabad',
 'Bannu',
'Batagram',
'Buner',
'Charsadda',
 'Chitral',
'Dera Ismail Khan',
'Hangu',
'Hari Pur',
'Karak',
'Kohat',
'Kohistan',
'Laki Marwat',
'Lower Dir',
'Malakand',
 'Mansehra',
'Mardan',
'Nowshehra',
'Peshawar',
'Shangla',
'Swabi',
 'Swat',
 'Tank',
 'Torghar',
'Upper Dir',
 'Bajor',
 'Khyber',
 'Kuram',
'Mohmind',
'North Waziristan',
 'Orakzai',
'South Waziristan']

In [68]:
# List of genders
genders = ['Female','Male']

In [69]:
residences=['Urban','Rural']

In [70]:
df['gender'].value_counts()

gender
Male      12558
Female    11787
Name: count, dtype: int64

In [99]:
# Create an empty list to store the results
results = []

# Loop through each vaccine and child_gender
for vaccine in vaccines:
    for gender in genders:
        coverage = calculate_vaccination_coverage_child_gender(df, gender, vaccine)
        results.append({'Vaccine': vaccine, 'gender': gender, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results_df.pivot_table(index='Vaccine', columns='gender', values='Coverage')
pivot_table1

gender,Female,Male
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg_sampled,84.6,79.4
ipv_sampled,48.0,58.3
measles_sampled,63.1,67.4
pcv1_sampled,75.7,71.4
pcv3_sampled,60.0,68.7
penta1_sampled,79.2,74.2
penta3_sampled,63.4,72.0
polio0_sampled,91.8,84.2
polio1_sampled,74.5,69.4
polio3_sampled,55.2,64.3


In [76]:
# Create an empty list to store the results
results1 = []

# Loop through each vaccine and child_residence
for vaccine in vaccines1:
    for residence in residences:
        coverage = calculate_vaccination_coverage_child_residence(df, residence, vaccine)
        results1.append({'Vaccine': vaccine, 'residence': residence, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results1_df = pd.DataFrame(results1)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results1_df.pivot_table(index='Vaccine', columns='residence', values='Coverage')
pivot_table1

residence,Rural,Urban
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg,77.0,78.4
ipv,42.4,56.8
measles,55.7,63.1
pcv1,65.5,72.9
pcv3,53.1,66.1
penta1,68.2,76.2
penta3,56.2,67.9
polio0,79.5,77.5
polio1,63.8,72.7
polio3,49.3,61.5


In [43]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_region
for vaccine in vaccines1:
    for region in regions:
        coverage = calculate_vaccination_coverage_region(df, region, vaccine)
        results2.append({'Vaccine': vaccine, 'region': region, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='region', values='Coverage')
pivot_table2
#pivot_table2.to_excel('/path/to/directory/pivot_table2_sorted.xlsx')
#pivot_table2.to_excel('Vaccinebyregion.xlsx', index=False)

region,Abbotabad,Bajor,Bannu,Batagram,Buner,Charsadda,Chitral,Dera Ismail Khan,Hangu,Hari Pur,...,Nowshehra,Orakzai,Peshawar,Shangla,South Waziristan,Swabi,Swat,Tank,Torghar,Upper Dir
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bcg,73.9,71.3,45.6,43.8,80.6,90.0,96.2,49.0,122.6,88.9,...,81.1,37.5,88.7,152.8,30.5,92.7,86.9,47.3,20.2,62.8
ipv,54.7,43.5,15.4,21.2,60.4,68.5,64.1,30.4,23.9,61.2,...,61.1,20.0,103.5,22.2,2.6,64.6,56.8,17.8,5.2,31.0
measles,67.4,52.2,25.7,22.5,69.1,71.3,96.2,37.3,21.7,86.1,...,61.7,32.5,106.8,53.7,25.4,81.6,64.2,33.3,15.1,46.8
pcv1,72.8,61.7,36.0,39.3,77.0,87.8,94.9,34.3,25.4,82.4,...,76.6,32.5,85.3,60.2,24.6,90.1,80.1,36.5,13.6,57.8
pcv3,68.5,47.0,17.6,24.7,73.4,81.1,93.6,31.4,23.7,78.7,...,68.0,32.5,115.4,55.6,18.6,83.3,69.3,28.4,12.7,37.2
penta1,73.3,62.6,39.0,43.5,77.7,91.1,96.8,45.1,26.5,86.0,...,79.4,32.5,88.6,62.0,35.6,90.7,81.8,45.2,14.7,62.7
penta3,70.0,54.8,17.6,24.7,76.3,84.2,95.5,34.3,23.9,84.1,...,69.1,32.5,118.6,56.5,27.1,83.3,71.6,31.5,13.8,51.2
polio1,72.5,57.4,28.7,41.6,71.2,85.2,94.9,37.3,33.3,75.7,...,76.6,35.0,85.7,61.1,17.8,91.4,76.1,28.4,16.9,52.8
polio3,65.9,44.3,20.6,28.1,62.6,68.5,86.0,31.4,29.8,72.9,...,61.7,30.0,109.4,50.9,5.1,77.9,64.2,18.9,5.1,34.9


In [44]:
pivot_table2.to_clipboard(index=True, header=True)

In [96]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_district
for vaccine in vaccines1:
    for district in districts:
        coverage = calculate_vaccination_coverage_district(df, district, vaccine)
        results2.append({'Vaccine': vaccine, 'district': district, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
results2_df
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='district', values='Coverage')
pivot_table2

district,Bannu,D.I. Khan,Hazara,Kohat,Malakand,Mardan,Peshawar
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
bcg,42.6,41.2,73.2,70.1,86.7,92.5,88.9
ipv,13.0,16.2,34.0,34.4,50.6,58.1,63.3
measles,27.1,31.5,46.4,53.0,64.1,75.1,67.2
pcv1,31.8,31.0,63.7,47.9,81.7,86.4,75.1
pcv3,18.7,25.5,42.1,39.9,64.5,74.7,73.7
penta1,34.6,41.3,66.2,49.1,83.8,88.9,77.5
penta3,19.8,30.7,43.5,40.3,69.2,78.3,76.3
polio0,32.8,36.4,70.9,68.2,93.5,91.4,97.8
polio1,26.8,27.2,64.4,49.7,79.1,85.3,72.9
polio3,19.2,17.7,40.5,39.3,60.0,68.1,67.7


In [97]:
pivot_table2.to_clipboard(index=True, header=True)

### Retention rate

In [100]:
def calculate_retention_rate(df, group_col, dose1_col, dose2_col):
    """
    Calculate retention rate from dose2 to dose1 for each group.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame containing vaccination data.
    group_col (str): Column name for the grouping variable.
    dose1_col (str): Column name representing the first dose.
    dose2_col (str): Column name representing the second dose.
    
    Returns:
    pandas.DataFrame: DataFrame with retention rates calculated and added as a new column.
    """
    # Group by the specified column and aggregate sum of dose1 and dose2
    df1 = df.groupby([group_col]).agg(
        sum_dose1=(dose1_col, 'sum'),
        sum_dose2=(dose2_col, 'sum')
    ).reset_index()
    
    # Calculate retention rate
    df1['retention_rate'] = ((1 - ((df1['sum_dose1'] - df1['sum_dose2']) / df1['sum_dose1'])) * 100)
    pivot_table2 = df1.pivot_table(columns=group_col, values='retention_rate')
    return pivot_table2
  

In [101]:
 filtered_df = df[(df['UB2']==1)]

In [104]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'district', 'penta1', 'penta3')

# Copy the result to the clipboard
#result_df.to_clipboard(index=True, header=True)
result_df

district,Bannu,D.I. Khan,Hazara,Kohat,Malakand,Mardan,Peshawar
retention_rate,57.142857,74.380165,65.647059,82.191781,82.625118,88.087774,98.486933


In [106]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'district', 'polio1', 'polio3')

# Copy the result to the clipboard
#result_df.to_clipboard(index=True, header=True)
result_df

district,Bannu,D.I. Khan,Hazara,Kohat,Malakand,Mardan,Peshawar
retention_rate,71.590909,65.0,62.91866,79.054054,75.9,79.87013,92.86754


In [107]:
calculate_retention_rate(filtered_df, 'residence', 'penta1', 'penta3')

residence,Rural,Urban
retention_rate,82.370821,89.125296


In [108]:
# Call the calculate_retention_rate function
calculate_retention_rate(filtered_df, 'residence', 'polio1', 'polio3')

residence,Rural,Urban
retention_rate,77.355439,84.653465


In [109]:
calculate_retention_rate(filtered_df, 'gender', 'penta1', 'penta3')

gender,Female,Male
retention_rate,74.539683,92.635135


In [110]:
calculate_retention_rate(filtered_df, 'gender', 'polio1', 'polio3')

gender,Female,Male
retention_rate,69.111709,88.281812


## National retention rate

In [111]:
polio_retention_rate = ((1 - ((filtered_df['polio1'].sum() - filtered_df['polio3'].sum()) / filtered_df['polio1'].sum())) * 100)
polio_retention_rate

78.380257212374

In [112]:
penta_retention_rate = ((1 - ((filtered_df['penta1'].sum() - filtered_df['penta3'].sum()) / filtered_df['penta1'].sum())) * 100)
penta_retention_rate

83.30605564648118

# Fully vaccinated

In [125]:
# Calculate the number of fully vaccinated kids
fully_vaccinated = filtered_df[(filtered_df['bcg']==1) & (filtered_df['polio3']==1) & (filtered_df['measles']==1) & (filtered_df['penta3']==1)]
num_fully_vaccinated = len(fully_vaccinated)

# Calculate the total number of kids
total_kids = len(filtered_df)

# Calculate the percentage of fully vaccinated kids
percentage_fully_vaccinated = (num_fully_vaccinated / total_kids) * 100
percentage_fully_vaccinated

42.076626615279984

##  Zero dose childern Penta1

In [116]:
# Filter out rows with null values in relevant columns
filtered_df = filtered_df.dropna(subset=['bcg', 'polio3', 'measles', 'penta3','polio1','penta1'])

In [118]:
def calculate_zero_dose_proportion(df, vaccine_column):
    # Count the number of children who never received the Penta vaccination
    zero_dose_children = df[df[vaccine_column] == 0][vaccine_column].count()

    # Count the total number of children in the dataset
    total_children = df['penta1'].count()

    # Calculate the proportion of zero dose children
    proportion_zero_dose = (zero_dose_children / total_children) * 100

    return proportion_zero_dose

In [119]:
zero_dose_proportion = calculate_zero_dose_proportion(filtered_df, 'penta1')
print(f"Proportion of zero dose children: {zero_dose_proportion:.2f}%")


Proportion of zero dose children: 35.23%


In [120]:
def calculate_zero_dose_percentage_by_group(df, vaccine_column, group_column):
    # Group the DataFrame by group and count the total number of children in each group
    total_children_by_group = df.groupby(group_column).size()

    # Group the DataFrame by group and count the number of zero-dose children in each group
    zero_dose_children_by_group = df[df[vaccine_column] == 0].groupby(group_column).size()

    # Calculate the percentage of zero-dose children for each group
    zero_dose_percentage_by_group = round((zero_dose_children_by_group / total_children_by_group) * 100,1)

    return zero_dose_percentage_by_group

In [121]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'district')

print(zero_dose_percentage_by_group)

district
Bannu        40.6
D.I. Khan    35.7
Hazara       28.2
Kohat        31.7
Malakand     18.9
Mardan       10.7
Peshawar     19.1
dtype: float64


In [122]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'residence')

print(zero_dose_percentage_by_group)

residence
Rural    25.4
Urban    18.2
dtype: float64


In [123]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'gender')

print(zero_dose_percentage_by_group)

gender
Female    25.5
Male      23.8
dtype: float64


# Zero Dose Childern Non-vaccinated at all

In [127]:
# Calculate the number of no vaccinated kids
no_vaccinated = filtered_df[(filtered_df['bcg']==0) & (filtered_df['polio1']==0) & (filtered_df['measles']==0) & (filtered_df['penta1']==0)]
num_no_vaccinated = len(no_vaccinated)

# Calculate the total number of kids
total_kids =len(filtered_df)


# Calculate the percentage of no vaccinated kids
percentage_no_vaccinated = (num_no_vaccinated / total_kids) * 100
percentage_no_vaccinated

28.905010201768306