# Pakistan DHS 2017-18

#### WEIGHTING
###### Alert on Weights usedin the Pakistan 2017
-
18 Survey
Tables in Pakistan were not produced for the full country (8 regions), and some regions (AJK and GB) are
not included in the national totals. The standard weights are used to generate tabulations for 6
regions
(HV005/HV028/V005/D005/MV005).
To generate any tabulations for the other 2
regions (
GB or AJK with codes 5 and 7 respectively), use the
CS weights ( SHV005/SHV028/SV005/SD005/SMV005).
The CS weights are 0 for regions different of 5 and 7. The
standard weights are 0 for regions 5 and 7.

###### Define Functions

In [2]:
#import required packages
import pyreadstat
import warnings
warnings.filterwarnings("ignore")
from googletrans import Translator
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [144]:
# Function to calculate national vaccine coverage between specific age group
def calculate_vaccination_coverage(df, min_age, max_age, vaccine_column):
    # Filter rows where child_age is between min_age and max_age
    filtered_df = df[(df['child_age'] >= min_age) & (df['child_age'] <= max_age)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    #total_children = filtered_df[vaccine_column].count()  # Count non-null values
    filtered_df_len=len(filtered_df)
    percentage_vaccinated = (filtered_df[vaccine_column].sum() / filtered_df_len) * 100
    #percentage_vaccinated = (filtered_df[vaccine_column].sum() /  total_children) * 100
    print("{} Vaccination Coverage for children between {} to {} months for Pakistan: {:.2f}%".format(vaccine_column, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated


# Example usage:
# calculate_vaccination_coverage(dfKR, 12, 23, 'bcg')


In [145]:
# Function to calculate vaccination coverage for each child_gender between specific age group
def calculate_vaccination_coverage_child_gender(df, min_age, max_age, child_gender, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and child_gender matches
    filtered_df = df[(df['child_age'] >= min_age) & 
                     (df['child_age'] <= max_age) &
                     (df['child_gender'] == child_gender)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and child_gender.")
        return 0
    percentage_vaccinated = (filtered_df[vaccine_column].sum() / total_children) * 100
    #print("{} Vaccination Coverage for {} children between {} to {} months using KR dataset: {:.2f}%".format(vaccine_column, child_gender, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated


In [146]:
# Function to calculate vaccination coverage for each region between specific age group
def calculate_vaccination_coverage_region(df, min_age, max_age, region, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['child_age'] >= min_age) & 
                     (df['child_age'] <= max_age) &
                     (df['region'] == region)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(region))
        return 0
    percentage_vaccinated = (filtered_df[vaccine_column].sum() / total_children) * 100
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, region, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated


In [147]:
# Function to calculate vaccination coverage for each rsidence i.e urban/rural between specific age group
def calculate_vaccination_coverage_residence(df, min_age, max_age, residence, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['child_age'] >= min_age) & 
                     (df['child_age'] <= max_age) &
                     (df['residence'] == residence)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(residence))
        return 0
    percentage_vaccinated = (filtered_df[vaccine_column].sum() / total_children) * 100
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, residence, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated

In [148]:
def calculate_retention_rate(df, group_col, dose1_col, dose2_col):
    """
    Calculate retention rate from dose2 to dose1 for each group.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame containing vaccination data.
    group_col (str): Column name for the grouping variable.
    dose1_col (str): Column name representing the first dose.
    dose2_col (str): Column name representing the second dose.
    
    Returns:
    pandas.DataFrame: DataFrame with retention rates calculated and added as a new column.
    """
    # Group by the specified column and aggregate sum of dose1 and dose2
    df1 = df.groupby([group_col]).agg(
        sum_dose1=(dose1_col, 'sum'),
        sum_dose2=(dose2_col, 'sum')
    ).reset_index()
    
    # Calculate retention rate
    df1['retention_rate'] = (1 - ((df1['sum_dose1'] - df1['sum_dose2']) / df1['sum_dose1'])) * 100
    pivot_table2 = df1.pivot_table(columns=group_col, values='retention_rate')
    return pivot_table2
  

## Vaccination Coverage with Pakistan dataset

In [74]:
# Reading data into pandas dataframe
KRdata, meta = pyreadstat.read_sav("C:/Users/swalke/PATH BMGF RISP Dropbox/BMGF RISP Project/Quant analysis/RAW DATA/Pakistan/DHS Pakistan 2017-18 All Datasets\PKKR71SV/PKKR71FL.SAV")

In [None]:
use data.dta

In [125]:
# Filter the data for child age between 12 to 23 months
KRvac=KRdata[(KRdata['B19'] >= 12) & (KRdata['B19'] <= 23)]

In [148]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("H1")
value_labels

{0.0: 'No card',
 1.0: 'Yes, seen',
 2.0: 'Yes, not seen',
 3.0: 'No longer has card'}

In [149]:
KRdata['H1'].value_counts()

H1
1.0    3847
0.0    1611
3.0    1329
2.0     366
Name: count, dtype: int64

In [150]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("H2")
value_labels

{0.0: 'No',
 1.0: 'Vaccination date on card',
 2.0: 'Reported by mother',
 3.0: 'Vaccination marked on card',
 8.0: "Don't know"}

In [154]:
KRvac['H2'].value_counts()

H2
1.0    1324
2.0     598
0.0     369
3.0      20
8.0       3
Name: count, dtype: int64

In [152]:
# Access variable labels
variable_labels = meta.column_labels

# Access value labels for a specific variable
value_labels = meta.variable_value_labels.get("H3")
value_labels

{0.0: 'No',
 1.0: 'Vaccination date on card',
 2.0: 'Reported by mother',
 3.0: 'Vaccination marked on card',
 8.0: "Don't know"}

In [153]:
KRdata['H3'].value_counts()

H3
1.0    3436
0.0    1825
2.0    1799
8.0      59
3.0      34
Name: count, dtype: int64

In [193]:
KRdata['H59'].value_counts()

Series([], Name: count, dtype: int64)

In [126]:
KRvac['source'].value_counts()

source
card      1796
mother     518
Name: count, dtype: int64

In [167]:
KRvac['sample_weight']=KRvac['V005']/1000000

In [183]:
def calculate_and_print_vaccine_coverage(dataframe, column_name):
    column = dataframe[column_name]
    coverage = (column.sum() / column.count()) * 100
    rounded_coverage = round(coverage, 2)
    print("Vaccine coverage for column", column_name, ":", rounded_coverage, "%")

In [197]:
# Create 'ch_bcg_either', 'ch_bcg_moth', and 'ch_bcg_card' columns based on 'H2'
KRvac['ch_bcg_either'] = KRvac['H2'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_bcg_moth'] = KRvac['H2'].map({1: 0, 2: 1, 3: 0, 0: 0, 8: 0})
KRvac['ch_bcg_card'] = KRvac['H2'].map({1: 1, 2: 0, 3: 1, 0: 0, 8: 0})

In [175]:
KRvac['ch_bcg_moth'].value_counts()

ch_bcg_moth
0.0    1716
1.0     598
Name: count, dtype: int64

In [176]:
KRvac['ch_bcg_card'].value_counts()

ch_bcg_card
1.0    1344
0.0     970
Name: count, dtype: int64

In [198]:
KRvac['ch_bcg_either'].value_counts()

ch_bcg_either
1.0    1942
0.0     369
Name: count, dtype: int64

In [180]:
KRvac['ch_bcg_either'].sum()

1942

In [181]:
KRvac['ch_bcg_either'].count()

2464

In [200]:
# Mapping variables
KRvac['ch_dpt1_either'] = KRvac['H3'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_dpt2_either'] = KRvac['H5'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_dpt3_either'] = KRvac['H7'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_polio1_either'] = KRvac['H4'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_polio1_either'] = KRvac['H4'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_polio2_either'] = KRvac['H6'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_polio3_either'] = KRvac['H8'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})
KRvac['ch_measles_either'] = KRvac['H9'].map({1: 1, 2: 1, 3: 1, 0: 0, 8: np.nan})

In [None]:
##### Vaccination Coverage

In [201]:
calculate_and_print_vaccine_coverage(KRvac, 'ch_bcg_either')
calculate_and_print_vaccine_coverage(KRvac, 'ch_dpt1_either')
calculate_and_print_vaccine_coverage(KRvac, 'ch_dpt2_either')
calculate_and_print_vaccine_coverage(KRvac, 'ch_dpt3_either')
calculate_and_print_vaccine_coverage(KRvac, 'ch_polio1_either')
calculate_and_print_vaccine_coverage(KRvac, 'ch_polio2_either')
calculate_and_print_vaccine_coverage(KRvac, 'ch_polio3_either')
#calculate_and_print_vaccine_coverage(KRvac, 'ch_rota1_either')
#calculate_and_print_vaccine_coverage(KRvac, 'ch_rota2_either')
#calculate_and_print_vaccine_coverage(KRvac, 'ch_rota3_either')
calculate_and_print_vaccine_coverage(KRvac, 'ch_measles_either')

Vaccine coverage for column ch_bcg_either : 84.03 %
Vaccine coverage for column ch_dpt1_either : 82.16 %
Vaccine coverage for column ch_dpt2_either : 77.88 %
Vaccine coverage for column ch_dpt3_either : 71.68 %
Vaccine coverage for column ch_polio1_either : 93.38 %
Vaccine coverage for column ch_polio2_either : 88.48 %
Vaccine coverage for column ch_polio3_either : 83.98 %
Vaccine coverage for column ch_measles_either : 68.69 %
