# Pakistan Sindh Province MICS 2018-19

In [1]:
import pyreadstat
import warnings
warnings.filterwarnings("ignore")
from googletrans import Translator
from googletrans import Translator
import pandas as pd
import numpy as np

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
def calculate_weighted_percentage(df, column_name):
    filtered_df = df[df['UB2'] == 1]
    weighted_percentage = round(filtered_df[column_name].mean() * 100,1)
    print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(column_name, weighted_percentage))

In [3]:
# Function to calculate vaccination coverage for each child_residence between specific age group
def calculate_vaccination_coverage_child_residence(df, residence, vaccine_column):
    filtered_df = df[(df['UB2']==1) & (df['residence']== residence)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [4]:
# Function to calculate vaccination coverage for each region between specific age group
def calculate_vaccination_coverage_region(df, region, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['UB2']==1) & (df['region'] == region)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(region))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, region, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated

In [5]:
# Function to calculate vaccination coverage for each child_gender between specific age group
def calculate_vaccination_coverage_child_gender(df, gender, vaccine_column):
    filtered_df = df[(df['UB2']==1) & (df['gender']== gender)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'UB2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [6]:
def calculate_vaccination_coverage_district(df, district, vaccine_column):
    # Filter rows where district matches
    filtered_df = df[(df['UB2']==1) & (df['district'] == district)]
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified district: {}.".format(district))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100, 1)
    return percentage_vaccinated

# Reading the dataframe

In [56]:
df, meta = pyreadstat.read_sav("C:/Users/swalke/Desktop/RISP/PAK Sindh 2018-19.sav")

In [8]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM6BD")
value_labels

{0.0: 'NOT GIVEN',
 44.0: 'MARKED ON CARD',
 66.0: 'MOTHER REPORTED',
 97.0: 'Inconsistent',
 98.0: 'DK',
 99.0: 'NO RESPONSE'}

In [9]:
df['IM6BY'].value_counts()

IM6BY
2018.0    1743
2017.0    1301
2016.0     856
2019.0     105
2015.0       5
9999.0       2
Name: count, dtype: int64

In [10]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM6P1D")
value_labels

{0.0: 'NOT GIVEN',
 44.0: 'MARKED ON CARD',
 66.0: 'MOTHER REPORTED',
 97.0: 'Inconsistent',
 98.0: 'DK',
 99.0: 'NO RESPONSE'}

In [57]:
# Define mapping variables
day_mapping = {range(1, 67): True,97.0:True, 98.0:np.nan, 99: np.nan, 0.0:False}
month_mapping ={range(1, 67): True, 97.07: True,97.0:True, 98.0:np.nan, 99: np.nan, 0.0:False}
year_mapping = {2014:True, 2015:True, 2016:True, 2017:True, 2018:True, 2019:True, 4444:True, 6666:True, 9997.0:True, 9999:np.nan,0.0:False}

In [58]:
df['card'] = df['IM2'].replace({1:True, 2:True, 3:True, 4:False, 9:np.nan})

In [59]:
df['recall'] = df['IM11'].replace({1:True, 2: False, 8: np.nan, 9: np.nan})

In [65]:
df['bcgday'] = df['IM6BD'].replace(day_mapping)
df['bcgmonth'] = df['IM6BM'].replace(month_mapping)
df['bcgyr'] = df['IM6BY'].replace(year_mapping)


df['polio0day'] = df['IM6P0D'].replace(day_mapping)
df['polio0month'] = df['IM6P0M'].replace(month_mapping)
df['polio0yr'] = df['IM6P0Y'].replace(year_mapping)

df['polio1day'] = df['IM6P1D'].replace(day_mapping)
df['polio1month'] = df['IM6P1M'].replace(month_mapping)
df['polio1yr'] = df['IM6P1Y'].replace(year_mapping)

df['polio2day'] = df['IM6P2D'].replace(day_mapping)
df['polio2month'] = df['IM6P2M'].replace(month_mapping)
df['polio2yr'] = df['IM6P2Y'].replace(year_mapping)


df['polio3day'] = df['IM6P3D'].replace(day_mapping)
df['polio3month'] = df['IM6P3M'].replace(month_mapping)
df['polio3yr'] = df['IM6P3Y'].replace(year_mapping)


df['ipvday'] = df['IM6ID'].replace(day_mapping)
df['ipvmonth'] = df['IM6IM'].replace(month_mapping)
df['ipvyr'] = df['IM6IY'].replace(year_mapping)


df['penta1day'] = df['IM6PENTA1D'].replace(day_mapping)
df['penta1month'] = df['IM6PENTA1M'].replace(month_mapping)
df['penta1yr'] = df['IM6PENTA1Y'].replace(year_mapping)


df['penta2day'] = df['IM6PENTA2D'].replace(day_mapping)
df['penta2month'] = df['IM6PENTA2M'].replace(month_mapping)
df['penta2yr'] = df['IM6PENTA2Y'].replace(year_mapping)


df['penta3day'] = df['IM6PENTA3D'].replace(day_mapping)
df['penta3month'] = df['IM6PENTA3M'].replace(month_mapping)
df['penta3yr'] = df['IM6PENTA3Y'].replace(year_mapping)

df['mcvday'] = df['IM6M1D'].replace(day_mapping)
df['mcvmonth'] = df['IM6M1M'].replace(month_mapping)
df['mcvyr'] = df['IM6M1Y'].replace(year_mapping)


#df['yfday'] = df['IM6YD'].replace(day_mapping)
#df['yfmonth'] = df['IM6YM'].replace(month_mapping)
#df['yfyr'] = df['IM6YY'].replace(year_mapping)

df['pcv1day'] = df['IM6PCV1D'].replace(day_mapping)
df['pcv1month'] = df['IM6PCV1M'].replace(month_mapping)
df['pcv1yr'] = df['IM6PCV1Y'].replace(year_mapping)

df['pcv2day'] = df['IM6PCV2D'].replace(day_mapping)
df['pcv2month'] = df['IM6PCV2M'].replace(month_mapping)
df['pcv2yr'] = df['IM6PCV2Y'].replace(year_mapping)

df['pcv3day'] = df['IM6PCV3D'].replace(day_mapping)
df['pcv3month'] = df['IM6PCV3M'].replace(month_mapping)
df['pcv3yr'] = df['IM6PCV3Y'].replace(year_mapping)

# BCG

In [61]:
df['bcg_card'] = df[['bcgday', 'bcgmonth','bcgyr']].max(axis=1)
df.loc[df['card'] == 0, 'bcg_card'] = False

df['bcg_recall'] = df['IM14'].replace({1.0: True, 2.0: False,8: np.nan, 9: np.nan})
df.loc[df['recall'] == 0, 'bcg_recall'] = False

df['bcg'] = df[['bcg_card', 'bcg_recall']].max(axis=1)
df['bcg_sampled']=df['bcg']*df['chweight']

In [62]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM14")
value_labels

{1.0: 'YES', 2.0: 'NO', 8.0: 'DK', 9.0: 'NO RESPONSE'}

In [63]:
df['bcg'].value_counts()

bcg
True     7435
False    2642
Name: count, dtype: int64

In [18]:
calculate_weighted_percentage(df, 'bcg')

Weighted percentage of 'bcg' if 'UB2' is 1: 78.0


In [19]:
calculate_weighted_percentage(df, 'bcg_sampled')

Weighted percentage of 'bcg_sampled' if 'UB2' is 1: 79.5


# Polio

In [77]:
#polio0
df['polio0_card'] = df[['polio0day', 'polio0month','polio0yr']].max(axis=1)
df.loc[df['card'] == False, 'polio0_card'] = False

df['polio0_recall1'] = df['IM17'].replace({1: 1, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio0_recall1'] = False

df['polio0'] = df[['polio0_card','polio0_recall1']].max(axis=1)
df['polio0_sampled']=df['polio0']*df['chweight']

In [78]:
#Polio1
df['polio1_card'] = df[['polio1day', 'polio1month','polio1yr']].max(axis=1)
df.loc[df['card'] == False, 'polio1_card'] = False

df['polio1_recall1'] = df['IM18'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall1'] = False

df['polio1'] = df[['polio1_card','polio1_recall1']].max(axis=1)
df['polio1_sampled']=df['polio1']*df['chweight']

In [79]:
#Polio2
df['polio2_card'] = df[['polio2day', 'polio2month','polio2yr']].max(axis=1)
df.loc[df['card'] == False, 'polio2_card'] = False

df['polio2_recall'] = df['IM18'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio2_recall'] = False

df['polio2'] = df[['polio2_card','polio2_recall']].max(axis=1)
df['polio2_sampled']=df['polio2']*df['chweight']

In [80]:
#Polio3
df['polio3_card'] = df[['polio3day', 'polio3month','polio3yr']].max(axis=1)
df.loc[df['card'] == False, 'polio3_card'] = False

df['polio3_recall'] = df['IM18'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio3_recall'] = False

df['polio3'] = df[['polio3_card','polio3_recall']].max(axis=1)
df['polio3_sampled']=df['polio3']*df['chweight']

# Penta

In [68]:
#penta1

df['penta1_card'] = df[['penta1day', 'penta1month','penta1yr']].max(axis=1)
df.loc[df['card'] == False, 'penta1_card'] = False

df['penta1_recall'] = df['IM21'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta1'] = df[['penta1_card', 'penta1_recall']].max(axis=1)
df['penta1_sampled']=df['penta1']*df['chweight']

In [69]:
# penta2
df['penta2_card'] = df[['penta2day', 'penta2month','penta2yr']].max(axis=1)
df.loc[df['card'] == False, 'penta2_card'] = False

df['penta2_recall'] = df['IM21'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta2'] = df[['penta2_card', 'penta2_recall']].max(axis=1)
df['penta2_sampled']=df['penta2']*df['chweight']

In [70]:
# penta3
df['penta3_card'] = df[['penta3day', 'penta3month','penta3yr']].max(axis=1)
df.loc[df['card'] == False, 'penta3_card'] = False

df['penta3_recall'] = df['IM21'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta3'] = df[['penta3_card', 'penta3_recall']].max(axis=1)
df['penta3_sampled']=df['penta3']*df['chweight']

# Yellow Fever

# PCV

In [71]:
#pcv1
df['pcv1_card'] = df[['pcv1day', 'pcv1month','pcv1yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv1_card'] = False

df['pcv1_recall1'] = df['IM23'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv1_recall1'] = False

df['pcv1'] = df[['pcv1_card','pcv1_recall1']].max(axis=1)
df['pcv1_sampled']=df['pcv1']*df['chweight']




In [72]:
#pcv2
df['pcv2_card'] = df[['pcv2day', 'pcv2month','pcv2yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv2_card'] = False

df['pcv2_recall'] = df['IM23'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv2_recall'] = False

df['pcv2'] = df[['pcv2_card','pcv2_recall']].max(axis=1)
df['pcv2_sampled']=df['pcv2']*df['chweight']

In [73]:

#pcv3
df['pcv3_card'] = df[['pcv3day', 'pcv3month','pcv3yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv3_card'] = False

df['pcv3_recall'] = df['IM23'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv3_recall'] = False

df['pcv3'] = df[['pcv3_card','pcv3_recall']].max(axis=1)
df['pcv3_sampled']=df['pcv3']*df['chweight']

# IPV

In [74]:
df['ipv'] = df[['ipvday', 'ipvmonth','ipvyr']].max(axis=1)
df.loc[df['card'] == False, 'ipv'] = False

#df['ipv_recall'] = df['IM23'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
#df.loc[df['recall'] == False, 'ipv_recall'] = False

#df['ipv'] = df[['ipv_card','ipv_recall']].max(axis=1)
df['ipv_sampled']=df['ipv']*df['chweight']

# mcv

In [75]:
df['mcv_card'] = df[['mcvday', 'mcvmonth','mcvyr']].max(axis=1)
df.loc[df['card'] == False, 'mcv_card'] = False

df['mcv_recall'] = df['IM26'].replace({1: 1, 2: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'mcv_recall'] = False

df['mcv'] = df[['mcv_card','mcv_recall']].max(axis=1)
df['mcv_sampled']=df['mcv']*df['chweight']

# Vaccination Calulations

In [81]:
calculate_weighted_percentage(df, 'bcg_sampled')
calculate_weighted_percentage(df, 'penta1_sampled')
calculate_weighted_percentage(df, 'penta3_sampled')
calculate_weighted_percentage(df, 'mcv_sampled')
calculate_weighted_percentage(df, 'polio0_sampled')
calculate_weighted_percentage(df, 'polio1_sampled')
calculate_weighted_percentage(df, 'polio3_sampled')

#calculate_weighted_percentage(df, 'yf_sampled')
calculate_weighted_percentage(df, 'pcv1_sampled')
calculate_weighted_percentage(df, 'pcv3_sampled')

calculate_weighted_percentage(df, 'ipv_sampled')

Weighted percentage of 'bcg_sampled' if 'UB2' is 1: 79.5
Weighted percentage of 'penta1_sampled' if 'UB2' is 1: 72.8
Weighted percentage of 'penta3_sampled' if 'UB2' is 1: 54.8
Weighted percentage of 'mcv_sampled' if 'UB2' is 1: 60.6
Weighted percentage of 'polio0_sampled' if 'UB2' is 1: 77.3
Weighted percentage of 'polio1_sampled' if 'UB2' is 1: 68.3
Weighted percentage of 'polio3_sampled' if 'UB2' is 1: 51.3
Weighted percentage of 'pcv1_sampled' if 'UB2' is 1: 67.7
Weighted percentage of 'pcv3_sampled' if 'UB2' is 1: 51.2
Weighted percentage of 'ipv_sampled' if 'UB2' is 1: 38.5


In [32]:
calculate_weighted_percentage(df, 'bcg')
calculate_weighted_percentage(df, 'polio1')
calculate_weighted_percentage(df, 'polio3')
calculate_weighted_percentage(df, 'penta1')
calculate_weighted_percentage(df, 'penta3')
#calculate_weighted_percentage(df, 'yf_sampled')
calculate_weighted_percentage(df, 'pcv1')
calculate_weighted_percentage(df, 'pcv3')
calculate_weighted_percentage(df, 'mcv')
calculate_weighted_percentage(df, 'ipv')

Weighted percentage of 'bcg' if 'UB2' is 1: 78.0
Weighted percentage of 'polio1' if 'UB2' is 1: 66.2
Weighted percentage of 'polio3' if 'UB2' is 1: 49.1
Weighted percentage of 'penta1' if 'UB2' is 1: 70.9
Weighted percentage of 'penta3' if 'UB2' is 1: 52.6
Weighted percentage of 'pcv1' if 'UB2' is 1: 66.0
Weighted percentage of 'pcv3' if 'UB2' is 1: 49.4
Weighted percentage of 'measles' if 'UB2' is 1: 59.2
Weighted percentage of 'ipv' if 'UB2' is 1: 37.8


In [82]:
# rename columns in dataframe
df = df.rename(columns={'HL4': 'gender','HH6':'residence','division':'district','HH7':'region',})

In [85]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("division")
value_labels

{1.0: 'Hyderabad',
 2.0: 'Karachi',
 3.0: 'Larkana',
 4.0: 'Mirpurkhas',
 5.0: 'Shaheed Benazirabad',
 6.0: 'Sukkur'}

In [84]:
df['residence'].value_counts()

residence
2.0    10480
1.0     7832
Name: count, dtype: int64

In [86]:
district_mapping={1.0: 'Hyderabad',
 2.0: 'Karachi',
 3.0: 'Larkana',
 4.0: 'Mirpurkhas',
 5.0: 'Shaheed Benazirabad',
 6.0: 'Sukkur'}

In [36]:
# List of Region
region_mapping = {1.0: 'Kashmore',
 2.0: 'Jacobabad',
 3.0: 'Shahdad Kot',
 4.0: 'Larkana',
 5.0: 'Shikarpur',
 6.0: 'Ghotki',
 7.0: 'Sukkur',
 8.0: 'Khairpur',
 9.0: 'Naushahro Feroze',
 10.0: 'Shaheed Benazirabad',
 11.0: 'Dadu',
 12.0: 'Jamshoro',
 13.0: 'Hyderabad',
 14.0: 'Matiari',
 15.0: 'Tando Allahyar',
 16.0: 'Tando Muhmmad Khan',
 17.0: 'Badin',
 18.0: 'Sujawal',
 19.0: 'Thatta',
 20.0: 'Sanghar',
 21.0: 'MirpurK Khas',
 22.0: 'Umer Kot',
 23.0: 'Tharparkar',
 24.0: 'Karachi Malir',
 25.0: 'Karachi East',
 26.0: 'Karachi Central',
 27.0: 'Karachi West',
 28.0: 'Karachi South',
 29.0: 'Karachi Korangi'}
residence_mapping = {1.0: 'Urban', 2.0: 'Rural'}
gender_mapping = {1.0: 'Male', 2.0: 'Female'}

In [87]:
# Apply mappings to DataFrame columns
df['gender'] = df['gender'].replace(gender_mapping)
df['region'] = df['region'].replace(region_mapping)
df['residence'] = df['residence'].replace(residence_mapping)
df['district']=df['district'].replace(district_mapping)

In [88]:
df['district'].value_counts()

district
Hyderabad              5255
Larkana                3679
Karachi                3642
Sukkur                 2129
Mirpurkhas             1940
Shaheed Benazirabad    1667
Name: count, dtype: int64

In [91]:
# List of vaccines
vaccines = ['bcg_sampled','polio0_sampled','polio1_sampled','polio3_sampled','penta1_sampled','penta3_sampled','pcv1_sampled','pcv3_sampled','mcv_sampled','ipv_sampled']

In [92]:
# List of vaccines
vaccines1 = ['bcg','polio0','polio1','polio3','penta1','penta3','pcv1','pcv3','mcv','ipv']

In [93]:
regions=['Kashmore','Jacobabad','Shahdad Kot','Larkana','Shikarpur','Ghotki','Sukkur','Khairpur','Naushahro Feroze',
'Shaheed Benazirabad','Dadu','Jamshoro','Hyderabad','Matiari','Tando Allahyar','Tando Muhmmad Khan','Badin',
'Sujawal','Thatta','Sanghar','MirpurK Khas','Umer Kot','Tharparkar','Karachi Malir','Karachi East','Karachi Central','Karachi West',
'Karachi South','Karachi Korangi']

In [94]:
# List of genders
genders = ['Female','Male']

In [95]:
residences=['Urban','Rural']

In [96]:
districts=['Hyderabad','Karachi','Larkana','Mirpurkhas','Shaheed Benazirabad','Sukkur']

In [97]:
df['gender'].value_counts()

gender
Male      9471
Female    8841
Name: count, dtype: int64

In [98]:
# Create an empty list to store the results
results = []

# Loop through each vaccine and child_gender
for vaccine in vaccines:
    for gender in genders:
        coverage = calculate_vaccination_coverage_child_gender(df, gender, vaccine)
        results.append({'Vaccine': vaccine, 'gender': gender, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results_df.pivot_table(index='Vaccine', columns='gender', values='Coverage')
pivot_table1

gender,Female,Male
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg_sampled,79.3,79.6
ipv_sampled,37.7,39.2
mcv_sampled,60.1,61.1
pcv1_sampled,67.6,67.7
pcv3_sampled,50.5,51.7
penta1_sampled,73.1,72.5
penta3_sampled,54.1,55.5
polio0_sampled,78.3,76.5
polio1_sampled,68.3,68.3
polio3_sampled,51.7,50.9


In [99]:
# Create an empty list to store the results
results1 = []

# Loop through each vaccine and child_residence
for vaccine in vaccines:
    for residence in residences:
        coverage = calculate_vaccination_coverage_child_residence(df, residence, vaccine)
        results1.append({'Vaccine': vaccine, 'residence': residence, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results1_df = pd.DataFrame(results1)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results1_df.pivot_table(index='Vaccine', columns='residence', values='Coverage')
pivot_table1

residence,Rural,Urban
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg_sampled,70.5,90.8
ipv_sampled,29.8,49.8
mcv_sampled,52.9,70.3
pcv1_sampled,56.4,81.9
pcv3_sampled,41.1,63.9
penta1_sampled,62.5,85.7
penta3_sampled,44.4,67.9
polio0_sampled,68.2,88.8
polio1_sampled,58.1,81.0
polio3_sampled,42.6,62.1


In [46]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_region
for vaccine in vaccines1:
    for region in regions:
        coverage = calculate_vaccination_coverage_region(df, region, vaccine)
        results2.append({'Vaccine': vaccine, 'region': region, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='region', values='Coverage')
pivot_table2
#pivot_table2.to_excel('/path/to/directory/pivot_table2_sorted.xlsx')
#pivot_table2.to_excel('Vaccinebyregion.xlsx', index=False)

region,Badin,Dadu,Ghotki,Hyderabad,Jacobabad,Jamshoro,Karachi Central,Karachi East,Karachi Korangi,Karachi Malir,...,Shahdad Kot,Shaheed Benazirabad,Shikarpur,Sujawal,Sukkur,Tando Allahyar,Tando Muhmmad Khan,Tharparkar,Thatta,Umer Kot
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bcg,62.0,80.2,78.4,88.3,56.6,62.0,96.9,90.8,77.4,79.5,...,78.2,72.7,83.5,38.0,74.6,83.0,76.4,87.2,77.9,84.4
ipv,0.0,46.2,39.4,54.5,12.2,18.9,52.3,52.3,48.5,49.1,...,33.9,34.9,20.7,14.0,31.8,42.3,33.0,65.2,35.6,22.1
measles,29.3,65.4,61.9,81.1,35.5,40.5,78.1,67.1,61.3,63.2,...,75.0,61.4,62.6,34.0,60.1,62.0,59.6,87.2,69.7,50.0
pcv1,40.7,58.8,62.0,81.7,48.7,51.3,92.6,79.5,69.5,73.9,...,72.4,52.3,72.2,30.0,68.6,50.0,66.3,89.4,50.0,77.5
pcv3,11.0,47.5,44.5,71.6,31.6,25.6,74.7,60.3,61.0,58.3,...,57.7,44.3,50.4,20.0,45.3,42.9,38.2,74.5,42.5,40.4
penta1,54.9,66.7,69.6,82.7,50.0,57.9,91.7,82.7,72.8,79.1,...,79.7,54.0,75.7,34.0,71.6,49.5,72.7,93.5,56.3,85.6
penta3,18.7,54.3,47.8,74.5,32.9,28.9,78.1,62.0,63.1,64.3,...,56.9,44.8,49.6,20.0,47.8,42.3,43.2,79.3,42.9,37.8
polio1,53.8,63.0,59.0,85.6,32.4,54.4,92.7,77.7,67.6,69.6,...,77.2,46.6,72.2,32.0,70.6,49.5,64.0,75.4,43.3,78.9
polio3,31.9,49.4,48.2,71.2,21.6,35.4,77.1,60.1,51.4,58.3,...,43.1,36.4,40.0,14.0,41.9,43.4,48.3,69.6,39.2,43.3


In [100]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_district
for vaccine in vaccines1:
    for district in districts:
        coverage = calculate_vaccination_coverage_district(df, district, vaccine)
        results2.append({'Vaccine': vaccine, 'district': district, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
results2_df
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='district', values='Coverage')
pivot_table2

district,Hyderabad,Karachi,Larkana,Mirpurkhas,Shaheed Benazirabad,Sukkur
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
bcg,74.3,86.7,72.7,81.0,77.1,78.2
ipv,34.4,50.4,27.8,43.6,40.2,35.5
mcv,58.4,65.9,48.5,70.4,63.6,55.5
pcv1,56.6,78.8,61.6,74.7,63.8,66.4
pcv3,40.8,62.7,44.3,57.5,51.4,45.7
penta1,62.0,81.4,65.9,81.2,70.7,72.8
penta3,43.7,65.8,47.4,59.9,54.9,50.6
polio0,72.2,85.1,71.7,79.6,72.8,77.4
polio1,58.6,77.4,62.0,72.0,63.1,68.3
polio3,46.2,62.1,36.8,57.5,48.4,48.5


In [102]:
pivot_table2.to_clipboard(index=True, header=True)

## Retention Rates

In [103]:
def calculate_retention_rate(df, group_col, dose1_col, dose2_col):
    """
    Calculate retention rate from dose2 to dose1 for each group.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame containing vaccination data.
    group_col (str): Column name for the grouping variable.
    dose1_col (str): Column name representing the first dose.
    dose2_col (str): Column name representing the second dose.
    
    Returns:
    pandas.DataFrame: DataFrame with retention rates calculated and added as a new column.
    """
    # Group by the specified column and aggregate sum of dose1 and dose2
    df1 = df.groupby([group_col]).agg(
        sum_dose1=(dose1_col, 'sum'),
        sum_dose2=(dose2_col, 'sum')
    ).reset_index()
    
    # Calculate retention rate
    df1['retention_rate'] = ((1 - ((df1['sum_dose1'] - df1['sum_dose2']) / df1['sum_dose1'])) * 100)
    pivot_table2 = df1.pivot_table(columns=group_col, values='retention_rate')
    return pivot_table2
  

In [104]:
 filtered_df = df[(df['UB2']==1)]

In [107]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'district', 'penta1', 'penta3')

# Copy the result to the clipboard
result_df.to_clipboard(index=True, header=True)


In [109]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'district', 'polio1', 'polio3')

# Copy the result to the clipboard
result_df.to_clipboard(index=True, header=True)


In [52]:
calculate_retention_rate(filtered_df, 'residence', 'penta1_sampled', 'penta3_sampled')

residence,Rural,Urban
retention_rate,70.982448,79.205873


In [53]:
# Call the calculate_retention_rate function
calculate_retention_rate(filtered_df, 'residence', 'polio1_sampled', 'polio3_sampled')

residence,Rural,Urban
retention_rate,73.351087,76.686548


In [54]:
calculate_retention_rate(filtered_df, 'gender', 'penta1_sampled', 'penta3_sampled')

gender,Female,Male
retention_rate,73.95607,76.451124


In [55]:
calculate_retention_rate(filtered_df, 'gender', 'polio1_sampled', 'polio3_sampled')

gender,Female,Male
retention_rate,75.796018,74.501835


## National retention rate

In [111]:
penta_retention_rate = ((1 - ((filtered_df['penta1'].sum() - filtered_df['penta3'].sum()) / filtered_df['penta1'].sum())) * 100)
penta_retention_rate

74.15254237288136

In [110]:
polio_retention_rate = ((1 - ((filtered_df['polio1'].sum() - filtered_df['polio3'].sum()) / filtered_df['polio1'].sum())) * 100)
polio_retention_rate

74.27993936331481

In [None]:
# Fully vaccinated

In [113]:
# Filter out rows with null values in relevant columns
filtered_df = filtered_df.dropna(subset=['bcg', 'polio3', 'mcv', 'penta3','polio1','penta1'])

In [114]:
# Calculate the number of fully vaccinated kids
fully_vaccinated = filtered_df[(filtered_df['bcg']==1) & (filtered_df['polio3']==1) & (filtered_df['mcv']==1) & (filtered_df['penta3']==1)]
num_fully_vaccinated = len(fully_vaccinated)

# Calculate the total number of kids
total_kids = len(filtered_df)

# Calculate the percentage of fully vaccinated kids
percentage_fully_vaccinated = (num_fully_vaccinated / total_kids) * 100
percentage_fully_vaccinated

40.027045300878974

##  Zero dose childern Penta1

In [None]:
def calculate_zero_dose_proportion(df, vaccine_column):
    # Count the number of children who never received the Penta vaccination
    zero_dose_children = df[df[vaccine_column] == 0][vaccine_column].count()

    # Count the total number of children in the dataset
    total_children = df['penta1'].count()

    # Calculate the proportion of zero dose children
    proportion_zero_dose = (zero_dose_children / total_children) * 100

    return proportion_zero_dose

In [None]:
zero_dose_proportion = calculate_zero_dose_proportion(filtered_df, 'penta1')
print(f"Proportion of zero dose children: {zero_dose_proportion:.2f}%")


In [None]:
def calculate_zero_dose_percentage_by_group(df, vaccine_column, group_column):
    # Group the DataFrame by group and count the total number of children in each group
    total_children_by_group = df.groupby(group_column).size()

    # Group the DataFrame by group and count the number of zero-dose children in each group
    zero_dose_children_by_group = df[df[vaccine_column] == 0].groupby(group_column).size()

    # Calculate the percentage of zero-dose children for each group
    zero_dose_percentage_by_group = round((zero_dose_children_by_group / total_children_by_group) * 100,1)

    return zero_dose_percentage_by_group

In [None]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'district')

print(zero_dose_percentage_by_group)

In [None]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'residence')

print(zero_dose_percentage_by_group)

In [None]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'gender')

print(zero_dose_percentage_by_group)

In [None]:
# Zero Dose Childern Non-vaccinated at all

In [116]:
# Calculate the number of no vaccinated kids
no_vaccinated = filtered_df[(filtered_df['bcg']==0) & (filtered_df['polio1']==0) & (filtered_df['mcv']==0) & (filtered_df['penta1']==0)]
num_no_vaccinated = len(no_vaccinated)

# Calculate the total number of kids
total_kids =len(filtered_df)


# Calculate the percentage of no vaccinated kids
percentage_no_vaccinated = (num_no_vaccinated / total_kids) * 100
percentage_no_vaccinated

17.68086544962813