# Pakistan Sindh Province MICS 2014

In [1]:
import pyreadstat
import warnings
warnings.filterwarnings("ignore")
from googletrans import Translator
from googletrans import Translator
import pandas as pd
import numpy as np

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
def calculate_weighted_percentage(df, column_name):
    filtered_df = df[df['AG2'] == 1]
    weighted_percentage = round(filtered_df[column_name].mean() * 100,1)
    print("Weighted percentage of '{}' if 'AG2' is 1: {}".format(column_name, weighted_percentage))

In [3]:
# Function to calculate vaccination coverage for each child_residence between specific age group
def calculate_vaccination_coverage_child_residence(df, residence, vaccine_column):
    filtered_df = df[(df['AG2']==1) & (df['residence']== residence)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'AG2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [4]:
# Function to calculate vaccination coverage for each region between specific age group
def calculate_vaccination_coverage_region(df, region, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['AG2']==1) & (df['region'] == region)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(region))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, region, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated

In [5]:
# Function to calculate vaccination coverage for each child_gender between specific age group
def calculate_vaccination_coverage_child_gender(df, gender, vaccine_column):
    filtered_df = df[(df['AG2']==1) & (df['gender']== gender)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'AG2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [6]:
def calculate_vaccination_coverage_district(df, district, vaccine_column):
    # Filter rows where district matches
    filtered_df = df[(df['AG2']==1) & (df['district'] == district)]
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified district: {}.".format(district))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100, 1)
    return percentage_vaccinated

# Reading the dataframe

In [7]:
df, meta = pyreadstat.read_sav("C:/Users/swalke/Desktop/RISP/PAK Sindh 2014.sav")

In [8]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM3BD")
value_labels

{0.0: 'Not given',
 44.0: 'Marked on card',
 66.0: 'Mother reported',
 97.0: 'Inconsistent',
 98.0: 'DK',
 99.0: 'Missing'}

In [9]:
df['IM3BY'].value_counts()

IM3BY
2013.0    1313
2012.0     673
2014.0     486
2011.0     238
4444.0      93
0.0         65
6666.0      16
9998.0      12
9997.0       6
Name: count, dtype: int64

In [10]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM6")
value_labels

{1.0: 'Yes', 2.0: 'No', 8.0: 'DK', 9.0: 'Missing'}

In [11]:
# Define mapping variables
day_mapping = {range(1, 67): True,97.0:True, 98.0:np.nan, 99: np.nan, 0.0:False}
month_mapping ={range(1, 67): True, 97.07: True,97.0:True, 98.0:np.nan, 99: np.nan, 0.0:False}
year_mapping = {2011:True, 2012:True, 2013:True, 2014:True, 2015:True, 2016:True, 2017:True, 2018:True, 2019:True, 4444:True, 6666:True, 9997.0:True,9998.0:np.nan, 9999:np.nan,0.0:False}

In [12]:
df['card'] = df['IM2'].replace({1:True, 2:False, 3:False, 4:False, 9:np.nan})

In [13]:
df['recall'] = df['IM6'].replace({1:True, 2: False, 8: np.nan, 9: np.nan})

In [36]:
df['bcgday'] = df['IM3BD'].replace(day_mapping)
df['bcgmonth'] = df['IM3BM'].replace(month_mapping)
df['bcgyr'] = df['IM3BY'].replace(year_mapping)

df['polio0day'] = df['IM3P0D'].replace(day_mapping)
df['polio0month'] = df['IM3P0M'].replace(month_mapping)
df['polio0yr'] = df['IM3P0Y'].replace(year_mapping)


df['polio1day'] = df['IM3P1D'].replace(day_mapping)
df['polio1month'] = df['IM3P1M'].replace(month_mapping)
df['polio1yr'] = df['IM3P1Y'].replace(year_mapping)

df['polio2day'] = df['IM3P2D'].replace(day_mapping)
df['polio2month'] = df['IM3P2M'].replace(month_mapping)
df['polio2yr'] = df['IM3P2Y'].replace(year_mapping)


df['polio3day'] = df['IM3P3D'].replace(day_mapping)
df['polio3month'] = df['IM3P3M'].replace(month_mapping)
df['polio3yr'] = df['IM3P3Y'].replace(year_mapping)


#df['ipvday'] = df['IM3ID'].replace(day_mapping)
#df['ipvmonth'] = df['IM3IM'].replace(month_mapping)
#df['ipvyr'] = df['IM3IY'].replace(year_mapping)


df['penta1day'] = df['IM3T1D'].replace(day_mapping)
df['penta1month'] = df['IM3T1M'].replace(month_mapping)
df['penta1yr'] = df['IM3T1Y'].replace(year_mapping)


df['penta2day'] = df['IM3T2D'].replace(day_mapping)
df['penta2month'] = df['IM3T2M'].replace(month_mapping)
df['penta2yr'] = df['IM3T2Y'].replace(year_mapping)


df['penta3day'] = df['IM3T3D'].replace(day_mapping)
df['penta3month'] = df['IM3T3M'].replace(month_mapping)
df['penta3yr'] = df['IM3T3Y'].replace(year_mapping)

df['mcvday'] = df['IM3M1D'].replace(day_mapping)
df['mcvmonth'] = df['IM3M1M'].replace(month_mapping)
df['mcvyr'] = df['IM3M1Y'].replace(year_mapping)


#df['yfday'] = df['IM3YD'].replace(day_mapping)
#df['yfmonth'] = df['IM3YM'].replace(month_mapping)
#df['yfyr'] = df['IM3YY'].replace(year_mapping)

df['pcv1day'] = df['IM3C1D'].replace(day_mapping)
df['pcv1month'] = df['IM3C1M'].replace(month_mapping)
df['pcv1yr'] = df['IM3C1Y'].replace(year_mapping)

df['pcv2day'] = df['IM3C2D'].replace(day_mapping)
df['pcv2month'] = df['IM3C2M'].replace(month_mapping)
df['pcv2yr'] = df['IM3C2Y'].replace(year_mapping)

df['pcv3day'] = df['IM3C3D'].replace(day_mapping)
df['pcv3month'] = df['IM3C3M'].replace(month_mapping)
df['pcv3yr'] = df['IM3C3Y'].replace(year_mapping)

# BCG

In [15]:
df['bcg_card'] = df[['bcgday', 'bcgmonth','bcgyr']].max(axis=1)
df.loc[df['card'] == 0, 'bcg_card'] = False

df['bcg_recall'] = df['IM7'].replace({1.0: True, 2.0: False,8: np.nan, 9: np.nan})
df.loc[df['recall'] == 0, 'bcg_recall'] = False

df['bcg'] = df[['bcg_card', 'bcg_recall']].max(axis=1)
df['bcg_sampled']=df['bcg']*df['chweight']

In [16]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM7")
value_labels

{1.0: 'Yes', 2.0: 'No', 8.0: 'DK', 9.0: 'Missing'}

In [17]:
df['bcg'].value_counts()

bcg
True     6735
False    2886
Name: count, dtype: int64

In [18]:
calculate_weighted_percentage(df, 'bcg')

Weighted percentage of 'bcg' if 'AG2' is 1: 73.4


In [19]:
calculate_weighted_percentage(df, 'bcg_sampled')

Weighted percentage of 'bcg_sampled' if 'AG2' is 1: 74.5


# Polio

In [20]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM10")
value_labels

{9.0: 'Missing'}

In [21]:
df['IM10'].value_counts()

IM10
3.0    987
2.0    798
4.0    643
1.0    602
7.0    379
5.0    321
9.0    265
6.0    164
Name: count, dtype: int64

In [22]:
#polio0
df['polio0_card'] = df[['polio0day', 'polio0month','polio0yr']].max(axis=1)
df.loc[df['card'] == False, 'polio0_card'] = False

df['polio0_recall1'] = df['IM9'].replace({1: 1, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio0_recall1'] = False

df['polio0'] = df[['polio0_card','polio0_recall1']].max(axis=1)
df['polio0_sampled']=df['polio0']*df['chweight']

In [23]:
#Polio1
df['polio1_card'] = df[['polio1day', 'polio1month','polio1yr']].max(axis=1)
df.loc[df['card'] == False, 'polio1_card'] = False

df['polio1_recall1'] = df['IM10'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall1'] = False

df['polio1'] = df[['polio1_card','polio1_recall1']].max(axis=1)
df['polio1_sampled']=df['polio1']*df['chweight']

In [24]:
calculate_weighted_percentage(df, 'polio1_sampled')

Weighted percentage of 'polio1_sampled' if 'AG2' is 1: 74.1


In [25]:
#Polio2
df['polio2_card'] = df[['polio2day', 'polio2month','polio2yr']].max(axis=1)
df.loc[df['card'] == False, 'polio2_card'] = False

df['polio2_recall'] = df['IM10'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio2_recall'] = False

df['polio2'] = df[['polio2_card','polio2_recall']].max(axis=1)
df['polio2_sampled']=df['polio2']*df['chweight']

In [26]:
#Polio3
df['polio3_card'] = df[['polio3day', 'polio3month','polio3yr']].max(axis=1)
df.loc[df['card'] == False, 'polio3_card'] = False

df['polio3_recall'] = df['IM10'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio3_recall'] = False

df['polio3'] = df[['polio3_card','polio3_recall']].max(axis=1)
df['polio3_sampled']=df['polio3']*df['chweight']

# Penta

In [27]:
#penta1

df['penta1_card'] = df[['penta1day', 'penta1month','penta1yr']].max(axis=1)
df.loc[df['card'] == False, 'penta1_card'] = False

df['penta1_recall'] = df['IM12'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta1'] = df[['penta1_card', 'penta1_recall']].max(axis=1)
df['penta1_sampled']=df['penta1']*df['chweight']

In [28]:
# penta2
df['penta2_card'] = df[['penta2day', 'penta2month','penta2yr']].max(axis=1)
df.loc[df['card'] == False, 'penta2_card'] = False

df['penta2_recall'] = df['IM12'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta2'] = df[['penta2_card', 'penta2_recall']].max(axis=1)
df['penta2_sampled']=df['penta2']*df['chweight']

In [29]:
# penta3
df['penta3_card'] = df[['penta3day', 'penta3month','penta3yr']].max(axis=1)
df.loc[df['card'] == False, 'penta3_card'] = False

df['penta3_recall'] = df['IM12'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta3'] = df[['penta3_card', 'penta3_recall']].max(axis=1)
df['penta3_sampled']=df['penta3']*df['chweight']

# Yellow Fever

# PCV

In [30]:
#pcv1
df['pcv1_card'] = df[['pcv1day', 'pcv1month','pcv1yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv1_card'] = False

df['pcv1_recall1'] = df['IM12'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv1_recall1'] = False

df['pcv1'] = df[['pcv1_card','pcv1_recall1']].max(axis=1)
df['pcv1_sampled']=df['pcv1']*df['chweight']




In [31]:
#pcv2
df['pcv2_card'] = df[['pcv2day', 'pcv2month','pcv2yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv2_card'] = False

df['pcv2_recall'] = df['IM12'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv2_recall'] = False

df['pcv2'] = df[['pcv2_card','pcv2_recall']].max(axis=1)
df['pcv2_sampled']=df['pcv2']*df['chweight']

In [32]:

#pcv3
df['pcv3_card'] = df[['pcv3day', 'pcv3month','pcv3yr']].max(axis=1)
df.loc[df['card'] == False, 'pcv3_card'] = False

df['pcv3_recall'] = df['IM12'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'pcv3_recall'] = False

df['pcv3'] = df[['pcv3_card','pcv3_recall']].max(axis=1)
df['pcv3_sampled']=df['pcv3']*df['chweight']

# IPV

# mcv

In [37]:
df['mcv_card'] = df[['mcvday', 'mcvmonth','mcvyr']].max(axis=1)
df.loc[df['card'] == False, 'mcv_card'] = False

df['mcv_recall'] = df['IM16'].replace({1: 1, 2: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'mcv_recall'] = False

df['mcv'] = df[['mcv_card','mcv_recall']].max(axis=1)
df['mcv_sampled']=df['mcv']*df['chweight']

# Vaccination Calulations

In [38]:
calculate_weighted_percentage(df, 'bcg_sampled')
calculate_weighted_percentage(df, 'penta1_sampled')
calculate_weighted_percentage(df, 'penta3_sampled')
calculate_weighted_percentage(df, 'mcv_sampled')
calculate_weighted_percentage(df, 'polio0_sampled')
calculate_weighted_percentage(df, 'polio1_sampled')
calculate_weighted_percentage(df, 'polio3_sampled')

#calculate_weighted_percentage(df, 'yf_sampled')
calculate_weighted_percentage(df, 'pcv1_sampled')
calculate_weighted_percentage(df, 'pcv3_sampled')

#calculate_weighted_percentage(df, 'ipv_sampled')

Weighted percentage of 'bcg_sampled' if 'AG2' is 1: 74.5
Weighted percentage of 'penta1_sampled' if 'AG2' is 1: 71.4
Weighted percentage of 'penta3_sampled' if 'AG2' is 1: 49.2
Weighted percentage of 'mcv_sampled' if 'AG2' is 1: 52.9
Weighted percentage of 'polio0_sampled' if 'AG2' is 1: 71.8
Weighted percentage of 'polio1_sampled' if 'AG2' is 1: 74.1
Weighted percentage of 'polio3_sampled' if 'AG2' is 1: 55.8
Weighted percentage of 'pcv1_sampled' if 'AG2' is 1: 46.9
Weighted percentage of 'pcv3_sampled' if 'AG2' is 1: 28.8


In [58]:
calculate_weighted_percentage(df, 'bcg')
calculate_weighted_percentage(df, 'polio1')
calculate_weighted_percentage(df, 'polio3')
calculate_weighted_percentage(df, 'penta1')
calculate_weighted_percentage(df, 'penta3')
#calculate_weighted_percentage(df, 'yf_sampled')
calculate_weighted_percentage(df, 'pcv1')
calculate_weighted_percentage(df, 'pcv3')
calculate_weighted_percentage(df, 'mcv')
#calculate_weighted_percentage(df, 'ipv')

Weighted percentage of 'bcg' if 'AG2' is 1: 73.4
Weighted percentage of 'polio1' if 'AG2' is 1: 72.5
Weighted percentage of 'polio3' if 'AG2' is 1: 52.9
Weighted percentage of 'penta1' if 'AG2' is 1: 68.2
Weighted percentage of 'penta3' if 'AG2' is 1: 43.7
Weighted percentage of 'pcv1' if 'AG2' is 1: 47.3
Weighted percentage of 'pcv3' if 'AG2' is 1: 27.1
Weighted percentage of 'measles' if 'AG2' is 1: 51.0


In [39]:
# rename columns in dataframe
df = df.rename(columns={'HL4': 'gender','HH6':'residence','division':'district','HH7':'region',})

In [41]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("division")
value_labels

{1.0: 'Larkana',
 2.0: 'Sukkur',
 3.0: 'Hyderabad',
 4.0: 'Mirpur Khas',
 5.0: 'Karachi'}

In [42]:
district_mapping={1.0: 'Larkana',
 2.0: 'Sukkur',
 3.0: 'Hyderabad',
 4.0: 'Mirpur Khas',
 5.0: 'Karachi'}

In [59]:
df['residence'].value_counts()

residence
2.0    10480
1.0     7832
Name: count, dtype: int64

In [43]:
# List of Region
region_mapping = {1.0: 'Kashmore',
 2.0: 'Jacobabad',
 3.0: 'Shahdad Kot',
 4.0: 'Larkana',
 5.0: 'Shikarpur',
 6.0: 'Ghotki',
 7.0: 'Sukkur',
 8.0: 'Khairpur',
 9.0: 'Naushahro Feroze',
 10.0: 'Shaheed Benazirabad',
 11.0: 'Dadu',
 12.0: 'Jamshoro',
 13.0: 'Hyderabad',
 14.0: 'Matiari',
 15.0: 'Tando Allahyar',
 16.0: 'Tando Muhammad Khan',
 17.0: 'Badin',
 18.0: 'Sujawal',
 19.0: 'Thatta',
 20.0: 'Sanghar',
 21.0: 'Mirpur Khas',
 22.0: 'Umer Kot',
 23.0: 'Tharparkar',
 24.0: 'Karachi Malir',
 25.0: 'Karachi East',
 26.0: 'Karachi Central',
 27.0: 'Karachi West',
 28.0: 'Karachi South'}
residence_mapping = {1.0: 'Urban', 2.0: 'Rural'}
gender_mapping = {1.0: 'Male', 2.0: 'Female',9.0:np.nan}

In [44]:
# Apply mappings to DataFrame columns
df['gender'] = df['gender'].replace(gender_mapping)
df['region'] = df['region'].replace(region_mapping)
df['residence'] = df['residence'].replace(residence_mapping)
df['district']=df['district'].replace(district_mapping)

In [52]:
# List of vaccines
vaccines = ['bcg_sampled','polio0_sampled','polio1_sampled','polio3_sampled','penta1_sampled','penta3_sampled','pcv1_sampled','pcv3_sampled','mcv_sampled']

In [53]:
# List of vaccines
vaccines1 = ['bcg','polio0','polio1','polio3','penta1','penta3','pcv1','pcv3','mcv']

In [47]:
regions=['Kashmore','Jacobabad','Shahdad Kot','Larkana','Shikarpur','Ghotki','Sukkur','Khairpur','Naushahro Feroze',
'Shaheed Benazirabad','Dadu','Jamshoro','Hyderabad','Matiari','Tando Allahyar','Tando Muhammad Khan',
'Badin','Sujawal','Thatta','Sanghar','Mirpur Khas','Umer Kot','Tharparkar','Karachi Malir','Karachi East','Karachi Central',
'Karachi West','Karachi South']

In [58]:
districts=['Hyderabad','Karachi','Larkana','Mirpurkhas','Sukkur']

In [48]:
# List of genders
genders = ['Female','Male']

In [49]:
residences=['Urban','Rural']

In [50]:
df['gender'].value_counts()

gender
Male      9296
Female    8812
Name: count, dtype: int64

In [54]:
# Create an empty list to store the results
results = []

# Loop through each vaccine and child_gender
for vaccine in vaccines:
    for gender in genders:
        coverage = calculate_vaccination_coverage_child_gender(df, gender, vaccine)
        results.append({'Vaccine': vaccine, 'gender': gender, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results_df.pivot_table(index='Vaccine', columns='gender', values='Coverage')
pivot_table1

gender,Female,Male
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg_sampled,73.4,75.5
mcv_sampled,52.9,53.0
pcv1_sampled,45.2,48.4
pcv3_sampled,27.5,30.0
penta1_sampled,69.5,73.2
penta3_sampled,47.7,50.7
polio0_sampled,70.7,72.8
polio1_sampled,73.5,74.7
polio3_sampled,55.3,56.3


In [55]:
# Create an empty list to store the results
results1 = []

# Loop through each vaccine and child_residence
for vaccine in vaccines1:
    for residence in residences:
        coverage = calculate_vaccination_coverage_child_residence(df, residence, vaccine)
        results1.append({'Vaccine': vaccine, 'residence': residence, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results1_df = pd.DataFrame(results1)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results1_df.pivot_table(index='Vaccine', columns='residence', values='Coverage')
pivot_table1

residence,Rural,Urban
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg,65.7,84.6
mcv,42.8,63.3
pcv1,42.2,55.1
pcv3,20.2,37.5
penta1,59.1,81.1
penta3,32.2,60.2
polio0,61.7,82.4
polio1,65.6,82.8
polio3,44.6,65.5


In [56]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_region
for vaccine in vaccines1:
    for region in regions:
        coverage = calculate_vaccination_coverage_region(df, region, vaccine)
        results2.append({'Vaccine': vaccine, 'region': region, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='region', values='Coverage')
pivot_table2
#pivot_table2.to_excel('/path/to/directory/pivot_table2_sorted.xlsx')
#pivot_table2.to_excel('Vaccinebyregion.xlsx', index=False)

region,Badin,Dadu,Ghotki,Hyderabad,Jacobabad,Jamshoro,Karachi Central,Karachi East,Karachi Malir,Karachi South,...,Shahdad Kot,Shaheed Benazirabad,Shikarpur,Sujawal,Sukkur,Tando Allahyar,Tando Muhammad Khan,Tharparkar,Thatta,Umer Kot
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bcg,68.9,62.7,67.4,79.0,64.5,76.8,85.7,88.0,80.2,88.6,...,58.0,79.1,66.9,56.9,66.7,86.3,81.2,72.5,85.1,77.3
mcv,55.7,44.0,44.1,59.0,38.1,60.3,73.9,72.0,59.6,69.6,...,26.9,45.5,37.4,54.4,31.5,66.0,59.5,70.5,52.1,61.4
pcv1,41.9,38.8,48.4,42.0,39.3,51.6,55.1,51.4,32.6,44.1,...,38.8,53.7,46.9,27.1,42.2,65.6,47.4,56.4,54.3,52.2
pcv3,21.9,19.4,21.0,30.0,21.4,38.7,46.4,43.2,22.5,38.2,...,27.2,26.2,33.6,11.2,21.6,38.7,19.7,25.6,21.4,19.1
penta1,62.9,51.1,60.5,75.8,46.2,83.9,95.2,94.2,79.1,92.1,...,62.2,74.5,49.5,52.8,60.5,82.8,71.1,68.9,78.3,63.2
penta3,34.3,29.8,33.1,58.6,24.4,69.6,85.5,84.1,62.8,79.4,...,45.7,41.1,35.5,32.1,36.0,53.8,35.5,36.5,39.1,27.2
polio0,64.2,60.4,65.1,75.2,64.2,72.7,83.1,88.0,78.9,85.7,...,56.0,70.1,61.4,48.7,69.0,83.2,70.0,69.2,82.4,77.3
polio1,72.6,63.1,65.1,78.1,60.0,78.5,85.7,89.2,80.0,87.0,...,56.1,68.9,63.6,57.0,68.8,89.4,78.2,80.0,78.4,82.1
polio3,45.3,43.6,31.0,60.0,39.1,63.1,80.0,78.4,61.1,76.8,...,43.9,52.7,45.5,38.6,42.4,76.6,44.9,61.2,52.7,58.6


In [59]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_district
for vaccine in vaccines1:
    for district in districts:
        coverage = calculate_vaccination_coverage_district(df, district, vaccine)
        results2.append({'Vaccine': vaccine, 'district': district, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
results2_df
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='district', values='Coverage')
pivot_table2

No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.
No children found for the specified district: Mirpurkhas.


district,Hyderabad,Karachi,Larkana,Mirpurkhas,Sukkur
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bcg,74.9,83.6,63.9,0.0,72.7
mcv,56.1,64.5,33.8,0.0,44.9
pcv1,46.4,44.1,40.4,0.0,51.1
pcv3,25.6,34.1,27.1,0.0,26.8
penta1,69.2,85.5,54.6,0.0,66.4
penta3,43.3,71.4,37.6,0.0,39.0
polio0,69.8,81.7,62.7,0.0,69.7
polio1,74.3,82.5,62.2,0.0,69.5
polio3,54.1,71.8,46.2,0.0,44.4


In [60]:
pivot_table2.to_clipboard(index=True, header=True)

## Retention Rates

In [61]:
def calculate_retention_rate(df, group_col, dose1_col, dose2_col):
    """
    Calculate retention rate from dose2 to dose1 for each group.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame containing vaccination data.
    group_col (str): Column name for the grouping variable.
    dose1_col (str): Column name representing the first dose.
    dose2_col (str): Column name representing the second dose.
    
    Returns:
    pandas.DataFrame: DataFrame with retention rates calculated and added as a new column.
    """
    # Group by the specified column and aggregate sum of dose1 and dose2
    df1 = df.groupby([group_col]).agg(
        sum_dose1=(dose1_col, 'sum'),
        sum_dose2=(dose2_col, 'sum')
    ).reset_index()
    
    # Calculate retention rate
    df1['retention_rate'] = ((1 - ((df1['sum_dose1'] - df1['sum_dose2']) / df1['sum_dose1'])) * 100)
    pivot_table2 = df1.pivot_table(columns=group_col, values='retention_rate')
    return pivot_table2
  

In [62]:
 filtered_df = df[(df['AG2']==1)]

In [63]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'district', 'penta1', 'penta3')

# Copy the result to the clipboard
result_df.to_clipboard(index=True, header=True)


In [64]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'district', 'polio1', 'polio3')

# Copy the result to the clipboard
result_df.to_clipboard(index=True, header=True)


In [66]:
calculate_retention_rate(filtered_df, 'residence', 'penta1_sampled', 'penta3_sampled')

residence,Rural,Urban
retention_rate,51.98868,80.715707


In [112]:
# Call the calculate_retention_rate function
calculate_retention_rate(filtered_df, 'residence', 'polio1_sampled', 'polio3_sampled')

residence,Rural,Urban
retention_rate,65.151758,83.483976


In [113]:
calculate_retention_rate(filtered_df, 'gender', 'penta1_sampled', 'penta3_sampled')

gender,Female,Male
retention_rate,68.315732,69.007448


In [114]:
calculate_retention_rate(filtered_df, 'gender', 'polio1_sampled', 'polio3_sampled')

gender,Female,Male
retention_rate,75.192819,75.194392


In [None]:
calculate_retention_rate(filtered_df, 'polio1', 'polio3')

## National retention rate

In [67]:
penta_retention_rate = ((1 - ((filtered_df['penta1'].sum() - filtered_df['penta3'].sum()) / filtered_df['penta1'].sum())) * 100)
penta_retention_rate

64.23948220064725

In [68]:
polio_retention_rate = ((1 - ((filtered_df['polio1'].sum() - filtered_df['polio3'].sum()) / filtered_df['polio1'].sum())) * 100)
polio_retention_rate

73.14730100640439

## Fully vaccinated

In [69]:
# Filter out rows with null values in relevant columns
filtered_df = filtered_df.dropna(subset=['bcg', 'polio3', 'mcv', 'penta3','polio1','penta1'])

In [70]:
# Calculate the number of fully vaccinated kids
fully_vaccinated = filtered_df[(filtered_df['bcg']==1) & (filtered_df['polio3']==1) & (filtered_df['mcv']==1) & (filtered_df['penta3']==1)]
num_fully_vaccinated = len(fully_vaccinated)

# Calculate the total number of kids
total_kids = len(filtered_df)

# Calculate the percentage of fully vaccinated kids
percentage_fully_vaccinated = (num_fully_vaccinated / total_kids) * 100
percentage_fully_vaccinated

33.57196684250188

##  Zero dose childern Penta1

In [None]:
def calculate_zero_dose_proportion(df, vaccine_column):
    # Count the number of children who never received the Penta vaccination
    zero_dose_children = df[df[vaccine_column] == 0][vaccine_column].count()

    # Count the total number of children in the dataset
    total_children = df['penta1'].count()

    # Calculate the proportion of zero dose children
    proportion_zero_dose = (zero_dose_children / total_children) * 100

    return proportion_zero_dose

In [None]:
zero_dose_proportion = calculate_zero_dose_proportion(filtered_df, 'penta1')
print(f"Proportion of zero dose children: {zero_dose_proportion:.2f}%")


In [None]:
def calculate_zero_dose_percentage_by_group(df, vaccine_column, group_column):
    # Group the DataFrame by group and count the total number of children in each group
    total_children_by_group = df.groupby(group_column).size()

    # Group the DataFrame by group and count the number of zero-dose children in each group
    zero_dose_children_by_group = df[df[vaccine_column] == 0].groupby(group_column).size()

    # Calculate the percentage of zero-dose children for each group
    zero_dose_percentage_by_group = round((zero_dose_children_by_group / total_children_by_group) * 100,1)

    return zero_dose_percentage_by_group

In [None]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'district')

print(zero_dose_percentage_by_group)

In [None]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'residence')

print(zero_dose_percentage_by_group)

In [None]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'gender')

print(zero_dose_percentage_by_group)

## Zero Dose Childern Non-vaccinated at all

In [71]:
# Calculate the number of no vaccinated kids
no_vaccinated = filtered_df[(filtered_df['bcg']==0) & (filtered_df['polio1']==0) & (filtered_df['mcv']==0) & (filtered_df['penta1']==0)]
num_no_vaccinated = len(no_vaccinated)

# Calculate the total number of kids
total_kids =len(filtered_df)


# Calculate the percentage of no vaccinated kids
percentage_no_vaccinated = (num_no_vaccinated / total_kids) * 100
percentage_no_vaccinated

21.4016578749058