# Pakistan Khyber Pakhtunkhwa MICS 2016-17

In [70]:
import pyreadstat
import warnings
warnings.filterwarnings("ignore")
from googletrans import Translator
from googletrans import Translator
import pandas as pd
import numpy as np

In [226]:
def calculate_vaccination_percentage(df, column_name):
    filtered_df = df[df['AG2'] == 1]
    weighted_percentage = round(filtered_df[column_name].mean() * 100,1)
    print("Weighted percentage of '{}' if 'AG2' is 1: {}".format(column_name, weighted_percentage))

In [72]:
# Function to calculate vaccination coverage for each child_residence between specific age group
def calculate_vaccination_coverage_child_residence(df, residence, vaccine_column):
    filtered_df = df[(df['AG2']==1) & (df['residence']== residence)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'AG2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


In [73]:
# Function to calculate vaccination coverage for each region between specific age group
def calculate_vaccination_coverage_region(df, region, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['AG2']==1) & (df['region'] == region)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(region))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, region, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated

In [177]:
def calculate_vaccination_coverage_district(df, district, vaccine_column):
    # Filter rows where district matches
    filtered_df = df[(df['AG2']==1) & (df['district'] == district)]
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified district: {}.".format(district))
        return 0
    percentage_vaccinated = round(filtered_df[vaccine_column].mean() * 100, 1)
    return percentage_vaccinated

In [74]:
# Function to calculate vaccination coverage for each child_gender between specific age group
def calculate_vaccination_coverage_child_gender(df, gender, vaccine_column):
    filtered_df = df[(df['AG2']==1) & (df['gender']== gender)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    # Count non-null values in the vaccine_column
    weighted_percentage = round(filtered_df[vaccine_column].mean() * 100,1)
    #print("Weighted percentage of '{}' if 'AG2' is 1: {}".format(vaccine_column, weighted_percentage))                  
    return weighted_percentage


# Reading the dataframe

In [75]:
df, meta = pyreadstat.read_sav("C:/Users/swalke/Desktop/RISP/PAK Khyber 2016-17.sav")

In [76]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("AG2")
value_labels

In [77]:
df['AG2'].value_counts()

AG2
3.0    4523
0.0    4461
2.0    4064
1.0    4015
4.0    3863
Name: count, dtype: int64

In [78]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM3BD")
value_labels

{0.0: 'Not given',
 44.0: 'Marked on card',
 66.0: 'Mother reported',
 97.0: 'Inconsistent',
 98.0: 'DK',
 99.0: 'Missing'}

In [79]:
df['IM3BY'].value_counts()

IM3BY
2016.0    2385
2015.0    1470
2014.0     688
2017.0     432
0.0         32
9997.0      21
4444.0      15
6666.0       2
Name: count, dtype: int64

In [80]:
# Define mapping variables
day_mapping = {range(1, 67): True, 97.0:1, 99.0: np.nan, 0.0:False,98:np.nan, 9997.0:np.nan}
month_mapping ={range(1, 67): True, 97.0: 1, 99.0: np.nan, 0.0:False, 98:np.nan, 9997.0:np.nan}
year_mapping = {2014:True, 2015:True, 2016:True, 2017:True, 2018:True, 2019:True, 4444:True, 6666:True, 9999:np.nan, 9997.0:1, 9998.0:np.nan}

In [81]:
df['card'] = df['IM2'].replace({1:True, 2:False, 3:False, 4:False, 9:np.nan})

In [82]:
df['recall'] = df['IM6'].replace({1:True, 2: False, 8: np.nan, 9: np.nan})

In [83]:
df['bcgday'] = df['IM3BD'].replace(day_mapping)
df['bcgmonth'] = df['IM3BM'].replace(month_mapping)
df['bcgyr'] = df['IM3BY'].replace(year_mapping)


df['polio0day'] = df['IM3P0D'].replace(day_mapping)
df['polio0month'] = df['IM3P0M'].replace(month_mapping)
df['polio0yr'] = df['IM3P0Y'].replace(year_mapping)


df['polio1day'] = df['IM3P1D'].replace(day_mapping)
df['polio1month'] = df['IM3P1M'].replace(month_mapping)
df['polio1yr'] = df['IM3P1Y'].replace(year_mapping)

df['polio2day'] = df['IM3P2D'].replace(day_mapping)
df['polio2month'] = df['IM3P2M'].replace(month_mapping)
df['polio2yr'] = df['IM3P2Y'].replace(year_mapping)


df['polio3day'] = df['IM3P3D'].replace(day_mapping)
df['polio3month'] = df['IM3P3M'].replace(month_mapping)
df['polio3yr'] = df['IM3P3Y'].replace(year_mapping)


#df['ipvday'] = df['IM3ID'].replace(day_mapping)
#df['ipvmonth'] = df['IM3IM'].replace(month_mapping)
#df['ipvyr'] = df['IM3IY'].replace(year_mapping)


df['penta1day'] = df['IM3T1D'].replace(day_mapping)
df['penta1month'] = df['IM3T1M'].replace(month_mapping)
df['penta1yr'] = df['IM3T1Y'].replace(year_mapping)


df['penta2day'] = df['IM3T2D'].replace(day_mapping)
df['penta2month'] = df['IM3T2M'].replace(month_mapping)
df['penta2yr'] = df['IM3T2Y'].replace(year_mapping)


df['penta3day'] = df['IM3T3D'].replace(day_mapping)
df['penta3month'] = df['IM3T3M'].replace(month_mapping)
df['penta3yr'] = df['IM3T3Y'].replace(year_mapping)

df['mcv1day'] = df['IM3M1D'].replace(day_mapping)
df['mcv1month'] = df['IM3M1M'].replace(month_mapping)
df['mcv1yr'] = df['IM3M1Y'].replace(year_mapping)

df['mcv2day'] = df['IM3M2D'].replace(day_mapping)
df['mcv2month'] = df['IM3M2M'].replace(month_mapping)
df['mcv2yr'] = df['IM3M2Y'].replace(year_mapping)

#df['yfday'] = df['IM3YD'].replace(day_mapping)
#df['yfmonth'] = df['IM3YM'].replace(month_mapping)
#df['yfyr'] = df['IM3YY'].replace(year_mapping)

#df['pcv1day'] = df['IM3PCV1D'].replace(day_mapping)
#df['pcv1month'] = df['IM3PCV1M'].replace(month_mapping)
#df['pcv1yr'] = df['IM3PCV1Y'].replace(year_mapping)

#df['pcv2day'] = df['IM3PCV2D'].replace(day_mapping)
#df['pcv2month'] = df['IM3PCV2M'].replace(month_mapping)
#df['pcv2yr'] = df['IM3PCV2Y'].replace(year_mapping)

#df['pcv3day'] = df['IM3PCV3D'].replace(day_mapping)
#df['pcv3month'] = df['IM3PCV3M'].replace(month_mapping)
#df['pcv3yr'] = df['IM3PCV3Y'].replace(year_mapping)

In [84]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM7")
value_labels

{1.0: 'Yes', 2.0: 'No', 8.0: 'DK', 9.0: 'Missing'}

# BCG

In [85]:
df['bcg_card'] = df[['bcgday', 'bcgmonth','bcgyr']].max(axis=1)
df.loc[df['card'] == 0, 'bcg_card'] = 0

df['bcg_recall'] = df['IM7'].replace({1: True, 2: False ,8: np.nan, 9: np.nan})
df.loc[df['recall'] == 0, 'bcg_recall'] = 0

df['bcg'] = df[['bcg_card', 'bcg_recall']].max(axis=1)
df['bcg_sampled']=df['bcg']*df['chweight']

In [86]:
calculate_weighted_percentage(df, 'bcg_sampled')

Weighted percentage of 'bcg_sampled' if 'AG2' is 1: 73.3


In [87]:
calculate_weighted_percentage(df, 'bcg_card')

Weighted percentage of 'bcg_card' if 'AG2' is 1: 61.4


In [88]:
calculate_weighted_percentage(df, 'bcg_recall')

Weighted percentage of 'bcg_recall' if 'AG2' is 1: 44.3


# Polio

In [89]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM10")
value_labels

{9.0: 'Missing'}

In [90]:
df['IM10'].value_counts()

IM10
1.0    1400
2.0     959
7.0     926
3.0     311
4.0     280
9.0     280
6.0     180
5.0     174
Name: count, dtype: int64

In [91]:
#polio0
df['polio0_card'] = df[['polio0day', 'polio0month','polio0yr']].max(axis=1)
df.loc[df['card'] == False, 'polio0_card'] = False

df['polio0_recall1'] = df['IM9'].replace({1: 1, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio0_recall1'] = False

df['polio0'] = df[['polio0_card','polio0_recall1']].max(axis=1)
df['polio0_sampled']=df['polio0']*df['chweight']

In [92]:
#Polio1
df['polio1_card'] = df[['polio1day', 'polio1month','polio1yr']].max(axis=1)
df.loc[df['card'] == False, 'polio1_card'] = False

df['polio1_recall1'] = df['IM10'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall1'] = False

df['polio1'] = df[['polio1_card','polio1_recall1']].max(axis=1)
df['polio1_sampled']=df['polio1']*df['chweight']

In [93]:
df['polio1'].value_counts()

polio1
True     8757
False    3463
Name: count, dtype: int64

In [94]:
calculate_weighted_percentage(df, 'polio1_sampled')

Weighted percentage of 'polio1_sampled' if 'AG2' is 1: 79.3


In [95]:
#Polio2
df['polio2_card'] = df[['polio2day', 'polio2month','polio2yr']].max(axis=1)
df.loc[df['card'] == False, 'polio2_card'] = False

df['polio2_recall'] = df['IM10'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio2_recall'] = False

df['polio2'] = df[['polio2_card','polio2_recall']].max(axis=1)
df['polio2_sampled']=df['polio2']*df['chweight']

In [96]:
calculate_weighted_percentage(df, 'polio2_sampled')

Weighted percentage of 'polio2_sampled' if 'AG2' is 1: 66.5


In [97]:
#Polio3
df['polio3_card'] = df[['polio3day', 'polio3month','polio3yr']].max(axis=1)
df.loc[df['card'] == False, 'polio3_card'] = False

df['polio3_recall'] = df['IM10'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio3_recall'] = False

df['polio3'] = df[['polio3_card','polio3_recall']].max(axis=1)
df['polio3_sampled']=df['polio3']*df['chweight']

In [98]:
calculate_weighted_percentage(df, 'polio3_sampled')

Weighted percentage of 'polio3_sampled' if 'AG2' is 1: 57.0


# Penta

In [99]:
#penta1

df['penta1_card'] = df[['penta1day', 'penta1month','penta1yr']].max(axis=1)
df.loc[df['card'] == False, 'penta1_card'] = False

df['penta1_recall'] = df['IM12'].replace({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta1'] = df[['penta1_card', 'penta1_recall']].max(axis=1)
df['penta1_sampled']=df['penta1']*df['chweight']

In [100]:
calculate_weighted_percentage(df, 'penta1_sampled')

Weighted percentage of 'penta1_sampled' if 'AG2' is 1: 76.3


In [101]:
# penta2
df['penta2_card'] = df[['penta2day', 'penta2month','penta2yr']].max(axis=1)
df.loc[df['card'] == False, 'penta2_card'] = False

df['penta2_recall'] = df['IM12'].replace({1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta2'] = df[['penta2_card', 'penta2_recall']].max(axis=1)
df['penta2_sampled']=df['penta2']*df['chweight']

In [102]:
calculate_weighted_percentage(df, 'penta2_sampled')

Weighted percentage of 'penta2_sampled' if 'AG2' is 1: 67.5


In [103]:
# penta3
df['penta3_card'] = df[['penta3day', 'penta3month','penta3yr']].max(axis=1)
df.loc[df['card'] == False, 'penta3_card'] = False

df['penta3_recall'] = df['IM12'].replace({1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'polio1_recall'] = False

df['penta3'] = df[['penta3_card', 'penta3_recall']].max(axis=1)
df['penta3_sampled']=df['penta3']*df['chweight']

In [104]:
calculate_weighted_percentage(df, 'penta3_sampled')

Weighted percentage of 'penta3_sampled' if 'AG2' is 1: 58.4


# Yellow Fever

# PCV

# IPV

# mcv

In [105]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("IM16")
value_labels

{1.0: 'Yes', 2.0: 'No', 8.0: 'DK', 9.0: 'Missing'}

In [106]:
df['mcv_card'] = df[['mcv1day', 'mcv1month','mcv1yr']].max(axis=1)
df.loc[df['card'] == False, 'mcv_card'] = False

df['mcv1_recall'] = df['IM16'].replace({1: 1, 2: 0, 8: np.nan, 9: np.nan})
df.loc[df['recall'] == False, 'mcv1_recall'] = False

df['mcv1'] = df[['mcv_card','mcv1_recall']].max(axis=1)
df['mcv1_sampled']=df['mcv1']*df['chweight']

In [107]:
calculate_weighted_percentage(df, 'mcv1_sampled')

Weighted percentage of 'mcv1_sampled' if 'AG2' is 1: 55.2


# Vaccination Calulations

In [108]:
calculate_vaccination_percentage(df, 'bcg_sampled')

calculate_vaccination_percentage(df, 'polio0_sampled')
calculate_vaccination_percentage(df, 'polio1_sampled')
calculate_vaccination_percentage(df, 'polio3_sampled')
calculate_vaccination_percentage(df, 'penta1_sampled')
calculate_vaccination_percentage(df, 'penta3_sampled')
calculate_vaccination_percentage(df, 'mcv1_sampled')
#calculate_vaccination_percentage(df, 'ipv_sampled')

Weighted percentage of 'bcg_sampled' if 'AG2' is 1: 73.3
Weighted percentage of 'polio0_sampled' if 'AG2' is 1: 78.0
Weighted percentage of 'polio1_sampled' if 'AG2' is 1: 79.3
Weighted percentage of 'polio3_sampled' if 'AG2' is 1: 57.0
Weighted percentage of 'penta1_sampled' if 'AG2' is 1: 76.3
Weighted percentage of 'penta3_sampled' if 'AG2' is 1: 58.4
Weighted percentage of 'mcv1_sampled' if 'AG2' is 1: 55.2


In [109]:
calculate_vaccination_percentage(df, 'bcg')
calculate_vaccination_percentage(df, 'polio1')
calculate_vaccination_percentage(df, 'polio3')
calculate_vaccination_percentage(df, 'penta1')
calculate_vaccination_percentage(df, 'penta3')
calculate_vaccination_percentage(df, 'mcv1')

Weighted percentage of 'bcg' if 'AG2' is 1: 67.8
Weighted percentage of 'polio1' if 'AG2' is 1: 77.4
Weighted percentage of 'polio3' if 'AG2' is 1: 54.1
Weighted percentage of 'penta1' if 'AG2' is 1: 69.1
Weighted percentage of 'penta3' if 'AG2' is 1: 51.8
Weighted percentage of 'mcv1' if 'AG2' is 1: 51.1


In [137]:
# rename columns in dataframe
df = df.rename(columns={'HL4': 'gender','HH6':'residence','division':'district','HH7':'region'})

In [138]:
#df['region'].value_counts()

In [166]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("division")
value_labels

{1.0: 'Bannu',
 2.0: 'D.I. Khan',
 3.0: 'Hazara',
 4.0: 'Kohat',
 5.0: 'Mardan',
 6.0: 'Peshawar',
 7.0: 'Malakand'}

In [165]:
district_mapping= {1.0: 'Bannu',
 2.0: 'D.I. Khan',
 3.0: 'Hazara',
 4.0: 'Kohat',
 5.0: 'Mardan',
 6.0: 'Peshawar',
 7.0: 'Malakand'}

In [141]:
# List of Region
region_mapping = {1.0: 'Abbottabad',
 2.0: 'Bannu',
 3.0: 'Batagram',
 4.0: 'Buner',
 5.0: 'Charsadda',
 6.0: 'Chitral',
 7.0: 'D. I. Khan',
 8.0: 'Hangu',
 9.0: 'Haripur',
 10.0: 'Karak',
 11.0: 'Kohat',
 12.0: 'Kohistan',
 13.0: 'Lakki Marwat',
 14.0: 'Lower Dir',
 15.0: 'Malakand Protected Area',
 16.0: 'Mansehra',
 17.0: 'Mardan',
 18.0: 'Nowshera',
 19.0: 'Peshawar',
 20.0: 'Shangla',
 21.0: 'Swabi',
 22.0: 'Swat',
 23.0: 'Tank',
 24.0: 'Tor Ghar',
 25.0: 'Upper Dir'}
residence_mapping = {1.0: 'Urban', 2.0: 'Rural'}
gender_mapping = {1.0: 'Male', 2.0: 'Female'}

In [142]:
# Apply mappings to DataFrame columns
df['gender'] = df['gender'].replace(gender_mapping)
df['region'] = df['region'].replace(region_mapping)
df['residence'] = df['residence'].replace(residence_mapping)
df['district']=df['district'].replace(district_mapping)

In [143]:
# List of vaccines
vaccines = ['bcg_sampled','penta1_sampled','penta3_sampled','polio0_sampled','polio1_sampled','polio3_sampled','mcv1_sampled']

In [144]:
# List of vaccines
vaccines1 = ['bcg','penta1','penta3','polio0','polio1','polio3','mcv1']

In [145]:
regions=['Abbottabad','Bannu','Batagram','Buner','Charsadda','Chitral','D. I. Khan','Hangu','Haripur','Karak','Kohat','Kohistan','Lakki Marwat','Lower Dir','Malakand Protected Area','Mansehra','Mardan','Nowshera',
'Peshawar','Shangla','Swabi','Swat','Tank','Tor Ghar','Upper Dir']

In [179]:
districts=['Bannu','D.I. Khan','Hazara','Kohat','Mardan','Peshawar','Malakand']

In [120]:
# List of genders
genders = ['Female','Male']

In [121]:
residences=['Urban','Rural']

In [122]:
df['gender'].value_counts()

gender
Male      10881
Female    10478
Name: count, dtype: int64

In [123]:
# Create an empty list to store the results
results = []

# Loop through each vaccine and child_gender
for vaccine in vaccines1:
    for gender in genders:
        coverage = calculate_vaccination_coverage_child_gender(df, gender, vaccine)
        results.append({'Vaccine': vaccine, 'gender': gender, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results_df.pivot_table(index='Vaccine', columns='gender', values='Coverage')
pivot_table1

gender,Female,Male
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg,67.1,68.5
mcv1,50.5,51.6
penta1,68.5,69.7
penta3,51.6,52.0
polio0,74.6,74.9
polio1,78.1,76.7
polio3,54.5,53.7


In [124]:
# Create an empty list to store the results
results1 = []

# Loop through each vaccine and child_residence
for vaccine in vaccines1:
    for residence in residences:
        coverage = calculate_vaccination_coverage_child_residence(df, residence, vaccine)
        results1.append({'Vaccine': vaccine, 'residence': residence, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results1_df = pd.DataFrame(results1)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table1 = results1_df.pivot_table(index='Vaccine', columns='residence', values='Coverage')
pivot_table1

residence,Rural,Urban
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
bcg,62.8,81.4
mcv1,46.4,63.8
penta1,63.9,83.8
penta3,47.5,64.1
polio0,71.5,83.6
polio1,75.1,83.9
polio3,53.6,55.4


In [151]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_region
for vaccine in vaccines1:
    for region in regions:
        coverage = calculate_vaccination_coverage_region(df, region, vaccine)
        results2.append({'Vaccine': vaccine, 'region': region, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
#results_df['Coverage'] = results_df['Coverage'].round(2)
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='region', values='Coverage')
pivot_table2
#pivot_table2.to_excel('/path/to/directory/pivot_table2_sorted.xlsx')
#pivot_table2.to_excel('Vaccinebyregion.xlsx', index=False)

region,Abbottabad,Bannu,Batagram,Buner,Charsadda,Chitral,D. I. Khan,Hangu,Haripur,Karak,...,Mansehra,Mardan,Nowshera,Peshawar,Shangla,Swabi,Swat,Tank,Tor Ghar,Upper Dir
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bcg,96.0,38.3,55.9,66.4,84.7,89.9,48.0,65.8,92.2,35.0,...,82.2,93.1,87.6,82.6,34.3,91.7,91.3,38.8,11.0,76.3
mcv1,83.4,25.1,41.4,45.7,64.3,88.4,26.5,51.8,81.6,29.2,...,66.4,71.1,65.1,61.2,14.0,75.4,73.2,21.2,6.6,52.3
penta1,91.4,41.1,55.7,68.4,87.6,97.1,47.9,59.4,94.8,39.7,...,83.6,94.0,85.7,89.6,34.0,90.5,92.0,35.6,13.1,74.5
penta3,85.0,13.9,35.0,55.3,57.4,86.7,21.5,43.9,84.5,20.6,...,70.0,77.7,71.4,64.2,9.4,68.0,82.8,15.3,4.8,51.0
polio0,92.7,43.2,64.5,67.2,80.8,90.9,54.8,82.9,91.3,49.2,...,89.0,93.1,88.9,82.8,64.0,90.2,91.2,46.5,42.9,83.2
polio1,96.0,46.7,87.5,63.8,78.7,92.4,52.9,83.4,93.2,52.1,...,91.0,92.1,86.3,81.8,68.1,92.1,91.2,44.3,66.7,84.3
polio3,82.7,26.9,69.1,50.4,44.0,79.0,17.8,42.7,85.4,24.4,...,81.4,72.4,70.5,49.4,26.4,64.6,70.9,17.7,41.1,59.3


In [154]:
df['district'].value_counts()

district
Malakand     5536
Hazara       4639
Peshawar     3280
Kohat        2399
D.I. Khan    1885
Mardan       1836
Bannu        1784
Name: count, dtype: int64

In [172]:
filtered_df.groupby('district')['polio0'].sum()

district
Bannu        158.0
D.I. Khan    171.0
Hazara       574.0
Kohat        346.0
Malakand     850.0
Mardan       335.0
Peshawar     532.0
Name: polio0, dtype: object

In [181]:
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_district
for vaccine in vaccines1:
    for district in districts:
        coverage = calculate_vaccination_coverage_district(filtered_df, district, vaccine)
        results2.append({'Vaccine': vaccine, 'district': district, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results2_df = pd.DataFrame(results2)
results2_df
# Display the results as a table
pivot_table2 = results2_df.pivot_table(index='Vaccine', columns='district', values='Coverage')
pivot_table2

district,Bannu,D.I. Khan,Hazara,Kohat,Malakand,Mardan,Peshawar
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
bcg,37.4,43.6,58.8,59.4,76.5,92.6,84.3
mcv1,23.2,23.9,47.6,46.7,57.6,72.7,62.9
penta1,35.7,41.8,58.1,60.3,79.9,92.7,88.1
penta3,12.0,18.4,47.9,44.9,63.7,74.1,64.5
polio0,51.0,50.9,71.3,74.4,80.6,92.0,83.8
polio1,54.9,48.8,83.0,76.9,81.3,92.1,82.1
polio3,36.7,17.8,69.0,41.4,60.5,69.6,53.1


In [126]:
pivot_table2.to_clipboard(index=True, header=True)

## Retention rate

In [182]:
def calculate_retention_rate(df, group_col, dose1_col, dose2_col):
    """
    Calculate retention rate from dose2 to dose1 for each group.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame containing vaccination data.
    group_col (str): Column name for the grouping variable.
    dose1_col (str): Column name representing the first dose.
    dose2_col (str): Column name representing the second dose.
    
    Returns:
    pandas.DataFrame: DataFrame with retention rates calculated and added as a new column.
    """
    # Group by the specified column and aggregate sum of dose1 and dose2
    df1 = df.groupby([group_col]).agg(
        sum_dose1=(dose1_col, 'sum'),
        sum_dose2=(dose2_col, 'sum')
    ).reset_index()
    
    # Calculate retention rate
    df1['retention_rate'] = ((1 - ((df1['sum_dose1'] - df1['sum_dose2']) / df1['sum_dose1'])) * 100)
    pivot_table2 = df1.pivot_table(columns=group_col, values='retention_rate')
    return pivot_table2
  

In [183]:
 filtered_df = df[(df['AG2']==1)]

In [186]:
# Call the calculate_retention_rate function
result_df = round(calculate_retention_rate(filtered_df, 'district', 'penta1', 'penta3'),1)

# Copy the result to the clipboard
#result_df.to_clipboard(index=True, header=True)
result_df

district,Bannu,D.I. Khan,Hazara,Kohat,Malakand,Mardan,Peshawar
retention_rate,33.663366,44.0,82.33945,74.458874,79.773692,79.746835,73.175966


In [187]:
# Call the calculate_retention_rate function
result_df = calculate_retention_rate(filtered_df, 'district', 'polio1', 'polio3')

# Copy the result to the clipboard
#result_df.to_clipboard(index=True, header=True)
result_df

district,Bannu,D.I. Khan,Hazara,Kohat,Malakand,Mardan,Peshawar
retention_rate,66.863905,36.419753,83.033033,53.781513,74.407583,75.535168,64.636542


In [131]:
calculate_retention_rate(filtered_df, 'residence', 'penta1', 'penta3')

residence,Rural,Urban
retention_rate,74.193548,76.510067


In [132]:
# Call the calculate_retention_rate function
calculate_retention_rate(filtered_df, 'residence', 'polio1', 'polio3')

residence,Rural,Urban
retention_rate,71.461717,66.097838


In [133]:
calculate_retention_rate(filtered_df, 'gender', 'penta1', 'penta3')

gender,Female,Male
retention_rate,75.34965,74.525969


In [134]:
calculate_retention_rate(filtered_df, 'gender', 'polio1', 'polio3')

gender,Female,Male
retention_rate,69.793746,70.019595


## National retention rate

In [189]:
polio_retention_rate = ((1 - ((filtered_df['polio1'].sum() - filtered_df['polio3'].sum()) / filtered_df['polio1'].sum())) * 100)
polio_retention_rate

69.90771259063942

In [190]:
penta_retention_rate = ((1 - ((filtered_df['penta1'].sum() - filtered_df['penta3'].sum()) / filtered_df['penta1'].sum())) * 100)
penta_retention_rate

74.925753075944

##  Zero dose childern

In [242]:
# Count the number of children who never received the Penta vaccination
zero_dose_children = df[df['penta1'] == 0]['penta1'].count()

# Count the total number of children in the dataset
total_children = len(df)

# Calculate the proportion of zero dose children
proportion_zero_dose = (zero_dose_children / total_children) * 100

print(f"Proportion of zero dose children: {proportion_zero_dose:.2f}")

Proportion of zero dose children: 19.23


In [243]:
def calculate_zero_dose_proportion(df, vaccine_column):
    # Count the number of children who never received the Penta vaccination
    zero_dose_children = df[df[vaccine_column] == 0][vaccine_column].count()

    # Count the total number of children in the dataset
    total_children = df['penta1'].count()

    # Calculate the proportion of zero dose children
    proportion_zero_dose = (zero_dose_children / total_children) * 100

    return proportion_zero_dose


In [244]:
zero_dose_proportion = calculate_zero_dose_proportion(filtered_df, 'penta1')
print(f"Proportion of zero dose children: {zero_dose_proportion:.2f}%")


Proportion of zero dose children: 30.88%


In [221]:
def calculate_zero_dose_percentage_by_group(df, vaccine_column, group_column):
    # Group the DataFrame by group and count the total number of children in each group
    total_children_by_group = df.groupby(group_column).size()

    # Group the DataFrame by group and count the number of zero-dose children in each group
    zero_dose_children_by_group = df[df[vaccine_column] == 0].groupby(group_column).size()

    # Calculate the percentage of zero-dose children for each group
    zero_dose_percentage_by_group = round((zero_dose_children_by_group / total_children_by_group) * 100,1)

    return zero_dose_percentage_by_group


In [223]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1, 'district')

print(zero_dose_percentage_by_group)

district
Bannu        38.2
D.I. Khan    26.1
Hazara       24.8
Kohat        23.3
Malakand     14.1
Mardan        7.8
Peshawar      9.1
dtype: float64


In [224]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'residence')

print(zero_dose_percentage_by_group)

residence
Rural    22.1
Urban    11.1
dtype: float64


In [225]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(df, 'penta1', 'gender')

print(zero_dose_percentage_by_group)

gender
Female    19.9
Male      18.5
dtype: float64


# Fully vaccinated

In [247]:
# Calculate the number of fully vaccinated kids
fully_vaccinated = filtered_df[(filtered_df['bcg']==1) & (filtered_df['polio3']==1) & (filtered_df['mcv1']==1) & (filtered_df['penta3']==1)]
num_fully_vaccinated = len(fully_vaccinated)

# Calculate the total number of kids
total_kids =len(filtered_df)

# Calculate the percentage of fully vaccinated kids
percentage_fully_vaccinated = (num_fully_vaccinated / total_kids) * 100
percentage_fully_vaccinated

35.118306351183065

In [248]:
# Filter out rows with null values in relevant columns
filtered_df = filtered_df.dropna(subset=['bcg', 'polio3', 'mcv1', 'penta3'])

# Calculate the number of fully vaccinated kids
fully_vaccinated = filtered_df[(filtered_df['bcg']==1) & (filtered_df['polio3']==1) & (filtered_df['mcv1']==1) & (filtered_df['penta3']==1)]
num_fully_vaccinated = len(fully_vaccinated)

# Calculate the total number of kids
total_kids = len(filtered_df)

# Calculate the percentage of fully vaccinated kids
percentage_fully_vaccinated = (num_fully_vaccinated / total_kids) * 100
percentage_fully_vaccinated

41.75303523837726

# Non Vaccinated 

In [241]:
# Calculate the number of no vaccinated kids
no_vaccinated = filtered_df[(filtered_df['bcg']==0) & (filtered_df['polio1']==0) & (filtered_df['mcv1']==0) & (filtered_df['penta1']==0)]
num_no_vaccinated = len(no_vaccinated)

# Calculate the total number of kids
total_kids =len(filtered_df)


# Calculate the percentage of no vaccinated kids
percentage_no_vaccinated = (num_no_vaccinated / total_kids) * 100
percentage_no_vaccinated

13.250311332503115