# Somalia HDS 2020

#### RESPONSE RATE
###### A total of 16,360 households were selected for the sample, of which 15,870 were occupied. Of the occupied households, 15,826 were successfully interviewed, yielding a response rate of 99.7 percent. The SHDS 2020 interviewed 16,486 women-11,876 ever-married women and 4,610 never-married women.
#### WEIGHTING
###### Design weights and sampling (survey) weights were computed for every household and ever-married women and never-married women selected to participate in the SHDS 2020. A design weight is the inverse of probability of selecting a housing unit to be interviewed. The sampling weight of a household is the design weight corrected for non-response including other adjustments where necessary.

In [1]:
#import required packages
import pyreadstat
import warnings
warnings.filterwarnings("ignore")
from googletrans import Translator
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
def replace_values_in_columns(df, columns_range):
    """
    Replace specified values in the specified columns of a DataFrame.

    Parameters:
        df (pandas.DataFrame): The DataFrame in which values need to be replaced.
        columns_range (list): A list of column names or column labels in which values should be replaced.

    Returns:
        pandas.DataFrame: The DataFrame with specified values replaced in the specified columns.
    """
    # Define the replacement dictionary
    replacement_dict = {1: True, 2: True, 3: True, 0: False, 8: np.nan}

    # Replace values in the specified columns, handling NaN values
    df[columns_range] = df[columns_range].replace(replacement_dict)

    return df

In [46]:
def calculate_vaccination_coverage(df, min_age, max_age, vaccine_column):
    # Filter rows where Child_age is between min_age and max_age
    filtered_df = df[(df['Child_Age'] >= min_age) & (df['Child_Age'] <= max_age)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values
    #filtered_df_len=len(filtered_df)
    #percentage_vaccinated = (filtered_df[vaccine_column].sum() / filtered_df_len) * 100
    percentage_vaccinated = round((filtered_df[vaccine_column].sum() /  total_children) * 100,1)
    print("{} Vaccination Coverage for children between {} to {} months using KR dataset: {:.2f}%".format(vaccine_column, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated


# Example usage:
# calculate_vaccination_coverage(dfKR, 12, 23, 'BCG')


In [51]:
def calculate_vaccination_coverage_gender(df, min_age, max_age, gender, vaccine_column):
    # Filter rows where Child_age is between min_age and max_age and gender matches
    filtered_df = df[(df['Child_Age'] >= min_age) & 
                     (df['Child_Age'] <= max_age) &
                     (df['Gender'] == gender)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and gender.")
        return 0
    percentage_vaccinated = round((filtered_df[vaccine_column].sum() / total_children) * 100,1)
    #print("{} Vaccination Coverage for {} children between {} to {} months using KR dataset: {:.2f}%".format(vaccine_column, gender, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated


In [48]:
def calculate_vaccination_coverage_region(df, min_age, max_age, region, vaccine_column):
    # Filter rows where Child_age is between min_age and max_age and region matches
    filtered_df = df[(df['Child_Age'] >= min_age) & 
                     (df['Child_Age'] <= max_age) &
                     (df['Region'] == region)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(region))
        return 0
    percentage_vaccinated = round((filtered_df[vaccine_column].sum() / total_children) * 100,1)
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, region, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated


In [110]:
# Function to calculate vaccination coverage for each rsidence i.e urban/rural between specific age group
def calculate_vaccination_coverage_residence(df, min_age, max_age, residence, vaccine_column):
    # Filter rows where child_age is between min_age and max_age and region matches
    filtered_df = df[(df['Child_Age'] >= min_age) & 
                     (df['Child_Age'] <= max_age) &
                     (df['Residence'] == residence)]

    # Calculate the percentage of children vaccinated with the specified vaccine
    total_children = filtered_df[vaccine_column].count()  # Count non-null values in the vaccine_column
    if total_children == 0:
        print("No children found for the specified age range and region: {}.".format(residence))
        return 0
    percentage_vaccinated = (filtered_df[vaccine_column].sum() / total_children) * 100
    #print("{} Vaccination Coverage for children in {} between {} to {} months using dataset: {:.2f}%".format(vaccine_column, residence, min_age, max_age, percentage_vaccinated))
    return percentage_vaccinated

In [6]:
# Reading data into pandas dataframe
df, meta = pyreadstat.read_sav("C:/Users/swalke/Desktop/RISP/HDS 2020 Somalia/SOIR71SV_3.sav")

In [None]:
df.shape

In [None]:
df.head()

## Vaccination Coverage with KR dataset

In [94]:
# Reading data into pandas dataframe
dfKR, meta = pyreadstat.read_sav("C:/Users/swalke/Desktop/RISP/HDS 2020 Somalia/SOKR71SV_4.sav")

In [8]:
dfKR.shape

(21218, 559)

In [9]:
dfKR.head()

Unnamed: 0,id,CASEID,ACTUAL_EA_CODE,VHHID,V000,V001,V002,V003,V005,V006,...,H46B,H47,G109,G109A,G112,G113,G113A,G114,M1G,NORMWT_MS7
0,1.0,1002.0,110100100000.0,10.0,SO7,8.0,51.0,2.0,0.557683,8.0,...,,,,,,,,,1.0,0.557683
1,2.0,2202.0,110100100000.0,22.0,SO7,8.0,68.0,2.0,0.557683,8.0,...,,,1.0,2.0,,,,,1.0,0.557683
2,3.0,2202.0,110100100000.0,22.0,SO7,8.0,68.0,2.0,0.557683,8.0,...,,,,,,,,,2.0,0.557683
3,4.0,2202.0,110100100000.0,22.0,SO7,8.0,68.0,2.0,0.557683,8.0,...,,,,,,,,,3.0,0.557683
4,5.0,2202.0,110100100000.0,22.0,SO7,8.0,68.0,2.0,0.557683,8.0,...,,,,,,,,,4.0,0.557683


In [95]:
# rename columns in dataframe
dfKR = dfKR.rename(columns={'CASEID': 'CaseId', 
                            'V102': 'Residence', 'H1B':'Child_Age',
                            'H2':'BCG','H3':'DPT1','H4':'Polio1','H5':'DPT2',
                            'H6':'Polio2','H7':'DPT3','H8':'Polio3','H9':'mcv',
                           'H0':'Polio0','V005':'sample_weight','B4':'Gender','V024':'Region'})

In [96]:
dfKR['Residence'].value_counts()

Residence
1.0    9215
3.0    6146
2.0    5857
Name: count, dtype: int64

In [97]:
# Define mapping variables
region_mapping = {11:'Awdal'
,12:'Woqooyi Galbeed'
,13:'Togdheer'
,14:'Sool'
,15:'Sanaag'
,16:'Bari'
,17:'Nugaal'
,18:'Mudug'
,19:'Galgaduud'
,20:'Hiraan'
,21:'Middle Shabelle'
,22:'Banadir'
,23:'Lower Shabelle'
,24:'Bay'
,25:'Bakool'
,26:'Gedo'
,27:'Middle Juba'
,28:'Lower Juba'}
residence_mapping = {1:'Urban',
                 2:'Rural',
                 3:'Nomadic'}
gender_mapping = {1:'Male',
                 2:'Female'}

In [98]:
dfKR['Gender'].value_counts()

Gender
1.0    11031
2.0    10187
Name: count, dtype: int64

In [99]:
dfKR['Region'].value_counts()

Region
22.0    2275
15.0    1884
14.0    1883
13.0    1785
12.0    1540
25.0    1333
28.0    1242
16.0    1215
26.0    1197
17.0    1165
18.0    1124
19.0    1075
21.0    1045
11.0    1018
20.0     976
24.0     461
Name: count, dtype: int64

In [100]:
# Apply mappings to DataFrame columns
dfKR['Gender'] = dfKR['Gender'].replace(gender_mapping)
dfKR['Region'] = dfKR['Region'].replace(region_mapping)
dfKR['Residence'] = dfKR['Residence'].replace(residence_mapping)

In [101]:
#Take the 'women's individual sample weight' variable and divide by 1 million.

#dfKR['sample_weight'] = dfKR['sample_weight']/1000000

In [102]:
# Define mapping
mapping = {0: False, 1: True, 2: True, 3: True, 8: np.nan}

# Replace values
dfKR['BCG'] = dfKR['BCG'].replace(mapping)
dfKR['DPT1'] = dfKR['DPT1'].replace(mapping)
dfKR['DPT2'] = dfKR['DPT2'].replace(mapping)
dfKR['DPT3'] = dfKR['DPT3'].replace(mapping)
dfKR['Polio1'] = dfKR['Polio1'].replace(mapping)
dfKR['Polio2'] = dfKR['Polio2'].replace(mapping)
dfKR['Polio3'] = dfKR['Polio3'].replace(mapping)
dfKR['Polio0'] = dfKR['Polio0'].replace(mapping)
dfKR['mcv'] = dfKR['mcv'].replace(mapping)

In [103]:
dfKR['BCG_sampled'] = dfKR['BCG'] * dfKR['sample_weight']
dfKR['DPT1_sampled'] = dfKR['DPT1']* dfKR['sample_weight']
dfKR['DPT2_sampled'] = dfKR['DPT2']* dfKR['sample_weight']
dfKR['DPT3_sampled'] = dfKR['DPT3']* dfKR['sample_weight']
dfKR['Polio1_sampled'] = dfKR['Polio1']* dfKR['sample_weight']
dfKR['Polio2_sampled'] = dfKR['Polio2']* dfKR['sample_weight']
dfKR['Polio3_sampled'] = dfKR['Polio3']* dfKR['sample_weight']
dfKR['Polio0_sampled'] = dfKR['Polio0']* dfKR['sample_weight']
dfKR['mcv_sampled'] = dfKR['mcv']* dfKR['sample_weight']

In [104]:
calculate_vaccination_coverage(dfKR, 12, 23, 'BCG_sampled')
calculate_vaccination_coverage(dfKR, 12, 23, 'DPT1_sampled')
calculate_vaccination_coverage(dfKR, 12, 23, 'DPT2_sampled')
calculate_vaccination_coverage(dfKR, 12, 23, 'DPT3_sampled')
calculate_vaccination_coverage(dfKR, 12, 23, 'Polio0')
calculate_vaccination_coverage(dfKR, 12, 23, 'Polio1_sampled')
calculate_vaccination_coverage(dfKR, 12, 23, 'Polio2_sampled')
calculate_vaccination_coverage(dfKR, 12, 23, 'Polio3_sampled')
calculate_vaccination_coverage(dfKR, 12, 23, 'mcv_sampled')

BCG_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 87.10%
DPT1_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 72.50%
DPT2_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 61.60%
DPT3_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 54.60%
Polio0 Vaccination Coverage for children between 12 to 23 months using KR dataset: 98.60%
Polio1_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 96.50%
Polio2_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 89.80%
Polio3_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 76.70%
mcv_sampled Vaccination Coverage for children between 12 to 23 months using KR dataset: 75.90%


75.9

In [105]:
# List of vaccines
vaccines = ['BCG_sampled', 'DPT1_sampled', 'DPT2_sampled','DPT3_sampled','Polio0_sampled','Polio1_sampled','Polio2_sampled','Polio3_sampled','mcv_sampled']

In [106]:
# List of vaccines
vaccines1 = ['BCG', 'DPT1','DPT3','Polio0','Polio1','Polio3','mcv']

In [107]:
# List of genders
genders = ['Male','Female']
# Create an empty list to store the results
results = []

# Loop through each vaccine and gender
for vaccine in vaccines1:
    for gender in genders:
        coverage = calculate_vaccination_coverage_gender(dfKR, 12, 23, gender, vaccine)
        results.append({'Vaccine': vaccine, 'Gender': gender, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results)
pivot_table1 = results_df.pivot_table(index='Vaccine', columns='Gender', values='Coverage')
pivot_table1
# Display the results as a table
#print(results_df)


Gender,Female,Male
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
BCG,78.6,79.0
DPT1,62.8,66.1
DPT3,50.6,52.0
Polio0,99.3,98.0
Polio1,82.9,85.8
Polio3,67.9,68.8
mcv,65.7,67.8


In [108]:
dfKR['Residence']

0        Urban
1        Urban
2        Urban
3        Urban
4        Urban
         ...  
21213    Rural
21214    Rural
21215    Rural
21216    Rural
21217    Rural
Name: Residence, Length: 21218, dtype: object

In [111]:
# List of region
residences = ["Urban", "Rural"]
# Create an empty list to store the results
results2 = []

# Loop through each vaccine and child_gender
for vaccine in vaccines1:
    for residence in residences:
        coverage = calculate_vaccination_coverage_residence(dfKR, 12, 23,residence,vaccine)
        results2.append({'Vaccine': vaccine, 'Residence': residence, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df2 = pd.DataFrame(results2)
results_df2['Coverage'] = results_df2['Coverage'].round(2)
# Display the results as a table
pivot_table2 = results_df2.pivot_table(index='Vaccine', columns='Residence', values='Coverage')
pivot_table2

Residence,Rural,Urban
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1
BCG,78.42,89.07
DPT1,68.12,70.5
DPT3,56.57,58.52
Polio0,99.04,99.39
Polio1,81.93,89.92
Polio3,64.54,76.54
mcv,71.01,72.52


In [54]:
# List of Region
regions = ['Awdal','Woqooyi Galbeed','Togdheer','Sool','Sanaag','Bari','Nugaal','Mudug','Galgaduud','Hiraan','Middle Shabelle','Banadir',
'Lower Shabelle','Bay','Bakool','Gedo','Middle Juba','Lower Juba']
# Create an empty list to store the results
results1 = []

# Loop through each vaccine and gender
for vaccine in vaccines1:
    for region in regions:
        coverage = calculate_vaccination_coverage_region(dfKR, 12, 23, region, vaccine)
        results1.append({'Vaccine': vaccine, 'Region': region, 'Coverage': coverage})

# Convert the list of dictionaries into a DataFrame
results_df1 = pd.DataFrame(results1)

# Display the results as a table
print(results_df1)

No children found for the specified age range and region: Lower Shabelle.
No children found for the specified age range and region: Middle Juba.
No children found for the specified age range and region: Lower Shabelle.
No children found for the specified age range and region: Middle Juba.
No children found for the specified age range and region: Lower Shabelle.
No children found for the specified age range and region: Middle Juba.
No children found for the specified age range and region: Lower Shabelle.
No children found for the specified age range and region: Middle Juba.
No children found for the specified age range and region: Lower Shabelle.
No children found for the specified age range and region: Middle Juba.
No children found for the specified age range and region: Lower Shabelle.
No children found for the specified age range and region: Middle Juba.
No children found for the specified age range and region: Lower Shabelle.
No children found for the specified age range and region

In [57]:
pivot_table = results_df1.pivot_table(index='Vaccine', columns='Region', values='Coverage')
pivot_table

Region,Awdal,Bakool,Banadir,Bari,Bay,Galgaduud,Gedo,Hiraan,Lower Juba,Lower Shabelle,Middle Juba,Middle Shabelle,Mudug,Nugaal,Sanaag,Sool,Togdheer,Woqooyi Galbeed
Vaccine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
BCG,86.4,90.3,78.9,71.4,90.6,75.6,76.9,84.2,88.7,0.0,0.0,71.4,77.8,79.7,68.4,63.9,84.6,75.0
DPT1,66.7,67.7,63.3,71.0,71.9,60.5,53.8,67.6,64.2,0.0,0.0,55.3,70.5,70.3,61.8,54.2,40.0,44.0
DPT3,53.8,56.0,50.0,58.7,53.6,45.5,36.4,58.1,47.5,0.0,0.0,45.2,54.9,62.3,51.9,35.7,40.0,22.2
Polio0,100.0,100.0,95.2,100.0,100.0,100.0,100.0,100.0,96.2,0.0,0.0,100.0,97.3,100.0,100.0,100.0,100.0,100.0
Polio1,81.8,90.3,81.5,84.1,97.0,87.8,69.2,94.6,90.6,0.0,0.0,85.4,84.6,87.7,83.8,72.2,41.7,71.0
Polio3,50.0,68.2,58.5,72.7,95.2,79.2,54.5,76.2,78.6,0.0,0.0,66.7,69.8,83.7,69.6,36.8,28.6,37.5
mcv,77.3,64.5,66.3,74.6,63.6,61.0,46.2,67.6,62.3,0.0,0.0,55.3,77.8,73.4,55.6,63.3,80.0,61.5


In [58]:
pivot_table.to_clipboard(index=True, header=True)

In [None]:
# Example usage for males
male_coverage = calculate_vaccination_coverage_gender(dfKR, 12, 23, 'Male', 'BCG_sampled')

# Example usage for females
female_coverage = calculate_vaccination_coverage_gender(dfKR, 12, 23, 'Female', 'BCG_sampled')


In [112]:
def calculate_retention_rate(df, group_col, dose1_col, dose2_col):
    """
    Calculate retention rate from dose2 to dose1 for each group.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame containing vaccination data.
    group_col (str): Column name for the grouping variable.
    dose1_col (str): Column name representing the first dose.
    dose2_col (str): Column name representing the second dose.
    
    Returns:
    pandas.DataFrame: DataFrame with retention rates calculated and added as a new column.
    """
    # Group by the specified column and aggregate sum of dose1 and dose2
    df1 = df.groupby([group_col]).agg(
        sum_dose1=(dose1_col, 'sum'),
        sum_dose2=(dose2_col, 'sum')
    ).reset_index()
    
    # Calculate retention rate
    df1['retention_rate'] = (1 - ((df1['sum_dose1'] - df1['sum_dose2']) / df1['sum_dose1'])) * 100
    pivot_table2 = df1.pivot_table(columns=group_col, values='retention_rate')
    return pivot_table2
  

# Retention Rate

In [116]:
filtered_df = dfKR[(dfKR['Child_Age'] >= 12) & (dfKR['Child_Age'] <= 23)]

In [127]:
result_df=calculate_retention_rate(filtered_df, 'Region', 'DPT1_sampled', 'DPT3_sampled')
result_df.to_clipboard(index=True, header=True)

In [128]:
result_df

Region,Awdal,Bakool,Banadir,Bari,Bay,Galgaduud,Gedo,Hiraan,Lower Juba,Middle Shabelle,Mudug,Nugaal,Sanaag,Sool,Togdheer,Woqooyi Galbeed
retention_rate,53.051187,66.470947,57.894737,53.387716,65.217391,72.626797,71.195125,70.209492,45.110103,57.379774,50.587756,66.118615,68.782464,38.470287,100.0,18.73063


In [122]:
calculate_retention_rate(filtered_df, 'Gender', 'DPT1_sampled',  'DPT3_sampled')

Gender,Female,Male
retention_rate,58.989757,57.387711


In [123]:
calculate_retention_rate(filtered_df, 'Residence', 'DPT1_sampled','DPT3_sampled')

Residence,Nomadic,Rural,Urban
retention_rate,22.899475,62.473221,60.970102


In [125]:
result_df=calculate_retention_rate(filtered_df, 'Region', 'Polio1_sampled', 'Polio3_sampled')
result_df.to_clipboard(index=True, header=True)

In [126]:
result_df

Region,Awdal,Bakool,Banadir,Bari,Bay,Galgaduud,Gedo,Hiraan,Lower Juba,Middle Shabelle,Mudug,Nugaal,Sanaag,Sool,Togdheer,Woqooyi Galbeed
retention_rate,23.932255,56.218162,41.333333,55.555539,62.5,56.223663,80.347389,53.931274,42.445768,44.34232,45.074265,65.600125,51.961394,26.918415,37.177776,34.073527


In [129]:
calculate_retention_rate(filtered_df, 'Gender', 'Polio1_sampled', 'Polio3_sampled')

Gender,Female,Male
retention_rate,47.480445,49.546385


In [130]:
calculate_retention_rate(filtered_df, 'Residence','Polio1_sampled', 'Polio3_sampled')

Residence,Nomadic,Rural,Urban
retention_rate,34.883278,42.820962,55.274077


In [131]:
polio_retention_rate = ((1 - ((filtered_df['Polio1_sampled'].sum() - filtered_df['Polio3_sampled'].sum()) / filtered_df['Polio1_sampled'].sum())) * 100)
polio_retention_rate

48.51347455592866

In [132]:
penta_retention_rate = ((1 - ((filtered_df['DPT1_sampled'].sum() - filtered_df['DPT3_sampled'].sum()) / filtered_df['DPT1_sampled'].sum())) * 100)
penta_retention_rate

58.18903975798231

# Zero Dose Chidern

In [135]:
# Filter out rows with null values in relevant columns
filtered_df = filtered_df.dropna(subset=['BCG', 'Polio3', 'mcv', 'DPT3','DPT1'])

In [136]:
# Count the number of children who never received the Penta vaccination
zero_dose_children = filtered_df[filtered_df['DPT1'] == 0]['DPT1'].count()

# Count the total number of children in the dataset
total_children = len(filtered_df)

# Calculate the proportion of zero dose children
proportion_zero_dose = (zero_dose_children / total_children) * 100

print(f"Proportion of zero dose children: {proportion_zero_dose:.2f}")

Proportion of zero dose children: 42.32


In [137]:
def calculate_zero_dose_percentage_by_group(df, vaccine_column, group_column):
    # Group the DataFrame by group and count the total number of children in each group
    total_children_by_group = df.groupby(group_column).size()

    # Group the DataFrame by group and count the number of zero-dose children in each group
    zero_dose_children_by_group = df[df[vaccine_column] == 0].groupby(group_column).size()

    # Calculate the percentage of zero-dose children for each group
    zero_dose_percentage_by_group = round((zero_dose_children_by_group / total_children_by_group) * 100,1)

    return zero_dose_percentage_by_group

In [139]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(filtered_df, 'DPT1', 'Region')

print(zero_dose_percentage_by_group)

Region
Awdal              50.0
Bakool             43.6
Banadir            58.1
Bari               34.3
Bay                28.6
Galgaduud          45.5
Gedo               54.5
Hiraan             44.4
Lower Juba         42.3
Middle Shabelle    48.3
Mudug              33.3
Nugaal             26.2
Sanaag             39.1
Sool               42.9
Togdheer           60.0
Woqooyi Galbeed    66.7
dtype: float64


In [140]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(filtered_df, 'DPT1', 'Gender')

print(zero_dose_percentage_by_group)

Gender
Female    45.5
Male      39.5
dtype: float64


In [141]:
zero_dose_percentage_by_group = calculate_zero_dose_percentage_by_group(filtered_df, 'DPT1', 'Residence')

print(zero_dose_percentage_by_group)

Residence
Nomadic    80.4
Rural      43.0
Urban      33.3
dtype: float64


# Fully Vccinated

In [142]:
# Calculate the number of fully vaccinated kids
fully_vaccinated = filtered_df[(filtered_df['BCG']==1) & (filtered_df['Polio3']==1) & (filtered_df['mcv']==1) & (filtered_df['DPT3']==1)]
num_fully_vaccinated = len(fully_vaccinated)

# Calculate the total number of kids
total_kids = len(filtered_df)

# Calculate the percentage of fully vaccinated kids
percentage_fully_vaccinated = (num_fully_vaccinated / total_kids) * 100
percentage_fully_vaccinated

44.20485175202156

In [None]:
# Non Vaccinated 

In [144]:
# Calculate the number of no vaccinated kids
no_vaccinated = filtered_df[(filtered_df['BCG']==0) & (filtered_df['Polio1']==0) & (filtered_df['mcv']==0) & (filtered_df['DPT1']==0)]
num_no_vaccinated = len(no_vaccinated)

# Calculate the total number of kids
total_kids =len(filtered_df)


# Calculate the percentage of no vaccinated kids
percentage_no_vaccinated = (num_no_vaccinated / total_kids) * 100
percentage_no_vaccinated

11.590296495956872

## Data Analysis IR dataset

## No of responders per region

In [None]:
# Select few columns from original dataframe into new dataframe 
df1= df[['CASEID','V101','V102']]
df1 = df1.rename(columns={'CASEID': 'CaseId', 'V101': 'Region', 'V102': 'Residence','V005':'sample_weight'})

In [None]:
# Top five rows of the new dataframe
df1.head()

In [None]:
#Take the 'women's individual sample weight' variable and divide by 1 million.

df['sample_weight'] = df['V005']/1000000

In [None]:
df['sample_weight']

In [None]:
region_mapping = {11:'Awdal'
,12:'Woqooyi Galbeed'
,13:'Togdheer'
,14:'Sool'
,15:'Sanaag'
,16:'Bari'
,17:'Nugaal'
,18:'Mudug'
,19:'Galgaduud'
,20:'Hiraan'
,21:'Middle Shabelle'
,22:'Banadir'
,23:'Lower Shabelle'
,24:'Bay'
,25:'Bakool'
,26:'Gedo'
,27:'Middle Juba'
,28:'Lower Juba'

 }
residence_mapping = {1:'Urban',
                 2:'Rural',
                 3:'Nomadic'}

In [None]:
df1 = df1.assign(Region= df1.Region.map(region_mapping))
df1 = df1.assign(Residence= df1.Residence.map(residence_mapping))

In [None]:
df1.head()

In [None]:

grouped_data = df1.groupby(['Region'])['CaseId'].count().sort_values()

# Plotting the bar chart with the specified size
plt.figure(figsize=(20, 6))  # Change the size of the plot

bars = plt.bar(grouped_data.index, grouped_data.values, color='skyblue')  # Set color as skyblue

# Adding totals above each bar
for bar, total in zip(bars, grouped_data.values):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.1, total, ha='center', va='bottom')

plt.title('Number of Cases per Region')  # Change the title
plt.ylabel('Number of Cases')  # Change the y-axis label
plt.xlabel('Region')  # Optional: Change the x-axis label

plt.show()

In [None]:
df_grouped = df1.groupby(['Region', 'Residence'])['CaseId'].count().reset_index().sort_values(by='CaseId')

fig = px.bar(df_grouped, 
             x='Region', y='CaseId', color='Residence',
             labels={'x': 'Region', 'CaseId': 'Number of Cases'}, 
             title='Cases per Region by Residence',
             category_orders={'Region': df_grouped['Region'].tolist()},
            )
fig.show()

## BCG 

In [None]:
# Define the columns range from 'H3$01' to 'H3$20'
columns_range = [f'H2${i:02}' for i in range(1, 21)]

# Call the function to replace values in the specified columns
df = replace_values_in_columns(df, columns_range)

In [None]:
df['H2$02'].isna().sum()

In [None]:
print(df['H2$02'].unique())
print(df['H2$02'].value_counts())
print(df['H2$01'].unique())
print(df['H2$01'].value_counts())

In [None]:
# Reshape the data from wide to long format
df_reshaped_BCG = pd.concat([
    df[['CASEID', 'H1B$01', 'H2$01','sample_weight']].rename(columns={'H1B$01': 'Child_Age', 'H2$01': 'BCG'}),
    df[['CASEID', 'H1B$02', 'H2$02','sample_weight']].rename(columns={'H1B$02': 'Child_Age', 'H2$02': 'BCG'})
], ignore_index=True)

df_reshaped_BCG = df_reshaped_BCG.sort_values(by=['CASEID', 'Child_Age']).reset_index(drop=True)

print(df_reshaped_BCG)

In [None]:
calculate_vaccination_coverage(df_reshaped_BCG, 12, 23, 'BCG')

## DPT1

In [None]:
# Define the columns range from 'H3$01' to 'H3$20'
columns_range = [f'H3${i:02}' for i in range(1, 21)]

# Call the function to replace values in the specified columns
df = replace_values_in_columns(df, columns_range)

In [None]:
df['H3$02'].isna().sum()

In [None]:
print(df['H3$02'].unique())
print(df['H3$02'].value_counts())
print(df['H3$01'].unique())
print(df['H3$01'].value_counts())

In [None]:
# Reshape the data from wide to long format
df_reshaped_DPT1 = pd.concat([
    df[['CASEID', 'H1B$01', 'H3$01','sample_weight']].rename(columns={'H1B$01': 'Child_Age', 'H3$01': 'DPT1'}),
    df[['CASEID', 'H1B$02', 'H3$02','sample_weight']].rename(columns={'H1B$02': 'Child_Age', 'H3$02': 'DPT1'})
], ignore_index=True)

df_reshaped_DPT1 = df_reshaped_DPT1.sort_values(by=['CASEID', 'Child_Age']).reset_index(drop=True)

print(df_reshaped_DPT1)

In [None]:
# Group by child age
grouped = df_reshaped.groupby(["Child_Age"])

# Apply the weighted average function to your variables
weighted_averages = grouped.apply(weighted_average)

#reset index
weighted_averages = weighted_averages.reset_index()

In [None]:
grouped.head(5)

In [None]:
calculate_vaccination_coverage(df_reshaped_DPT1, 12, 23, 'DPT1')

## DPT2

In [None]:
# Define the columns range from 'H3$01' to 'H3$20'
columns_range = [f'H5${i:02}' for i in range(1, 21)]

# Call the function to replace values in the specified columns
df = replace_values_in_columns(df, columns_range)

In [None]:
# Reshape the data from wide to long format
df_reshaped_DPT2 = pd.concat([
    df[['CASEID', 'H1B$01', 'H5$01','sample_weight']].rename(columns={'H1B$01': 'Child_Age', 'H5$01': 'DPT2'}),
    df[['CASEID', 'H1B$02', 'H5$02','sample_weight']].rename(columns={'H1B$02': 'Child_Age', 'H5$02': 'DPT2'})
], ignore_index=True)

df_reshaped_DPT2 = df_reshaped_DPT2.sort_values(by=['CASEID', 'Child_Age']).reset_index(drop=True)

print(df_reshaped3)

In [None]:
print(df['H5$01'].unique())
print(df['H5$01'].value_counts())
print(df['H5$02'].unique())
print(df['H5$02'].value_counts())

In [None]:
calculate_vaccination_coverage(df_reshaped_DPT2, 12, 23, 'DPT2')

## DPT3

In [None]:
# Define the columns range from 'H3$01' to 'H3$20'
columns_range = [f'H7${i:02}' for i in range(1, 21)]

# Call the function to replace values in the specified columns
df = replace_values_in_columns(df, columns_range)

In [None]:
# Reshape the data from wide to long format
df_reshaped_DPT3 = pd.concat([
    df[['CASEID', 'H1B$01', 'H7$01','sample_weight']].rename(columns={'H1B$01': 'Child_Age', 'H7$01': 'DPT3'}),
    df[['CASEID', 'H1B$02', 'H7$02','sample_weight']].rename(columns={'H1B$02': 'Child_Age', 'H7$02': 'DPT3'})
], ignore_index=True)

df_reshaped_DPT3 = df_reshaped_DPT3.sort_values(by=['CASEID', 'Child_Age']).reset_index(drop=True)

print(df_reshaped1)

In [None]:
print(df['H7$01'].unique())
print(df['H7$01'].value_counts())

In [None]:
calculate_vaccination_coverage(df_reshaped_DPT3, 12, 23, 'DPT3')

## Polio1

In [None]:
# Define the columns range from 'H3$01' to 'H3$20'
columns_range = [f'H4${i:02}' for i in range(1, 21)]

# Call the function to replace values in the specified columns
df = replace_values_in_columns(df, columns_range)

In [None]:
print(df['H4$01'].unique())
print(df['H4$01'].value_counts())
print(df['H4$02'].unique())
print(df['H4$02'].value_counts())

In [None]:
# Reshape the data from wide to long format
df_reshaped_Polio1= pd.concat([
    df[['CASEID', 'H1B$01', 'H4$01','sample_weight']].rename(columns={'H1B$01': 'Child_Age', 'H4$01': 'Polio1'}),
    df[['CASEID', 'H1B$02', 'H4$02','sample_weight']].rename(columns={'H1B$02': 'Child_Age', 'H4$02': 'Polio1'})
], ignore_index=True)

df_reshaped_Polio1 = df_reshaped_Polio1.sort_values(by=['CASEID', 'Child_Age']).reset_index(drop=True)

print(df_reshaped_Polio1)

In [None]:
calculate_vaccination_coverage(df_reshaped_Polio1, 12, 23, 'Polio1')

## Polio2

In [None]:
# Define the columns range from 'H3$01' to 'H3$20'
columns_range = [f'H6${i:02}' for i in range(1, 21)]

# Call the function to replace values in the specified columns
df = replace_values_in_columns(df, columns_range)

In [None]:
print(df['H6$01'].unique())
print(df['H6$01'].value_counts())
print(df['H6$02'].unique())
print(df['H6$02'].value_counts())

In [None]:
# Reshape the data from wide to long format
df_reshaped_polio2 = pd.concat([
    df[['CASEID', 'H1B$01', 'H6$01','sample_weight']].rename(columns={'H1B$01': 'Child_Age', 'H6$01': 'Polio2'}),
    df[['CASEID', 'H1B$02', 'H6$02','sample_weight']].rename(columns={'H1B$02': 'Child_Age', 'H6$02': 'Polio2'})
], ignore_index=True)

df_reshaped_polio2 = df_reshaped_polio2.sort_values(by=['CASEID', 'Child_Age']).reset_index(drop=True)

print(df_reshaped_polio2)

In [None]:
calculate_vaccination_coverage(df_reshaped_polio2, 12, 23, 'Polio2')

## Polio3

In [None]:
# Define the columns range from 'H3$01' to 'H3$20'
columns_range = [f'H8${i:02}' for i in range(1, 21)]

# Call the function to replace values in the specified columns
df = replace_values_in_columns(df, columns_range)

In [None]:
print(df['H8$01'].unique())
print(df['H8$01'].value_counts())
print(df['H8$02'].unique())
print(df['H8$02'].value_counts())

In [None]:
# Reshape the data from wide to long format
df_reshaped_polio3 = pd.concat([
    df[['CASEID', 'H1B$01', 'H8$01','sample_weight']].rename(columns={'H1B$01': 'Child_Age', 'H8$01': 'Polio3'}),
    df[['CASEID', 'H1B$02', 'H8$02','sample_weight']].rename(columns={'H1B$02': 'Child_Age', 'H8$02': 'Polio3'})
], ignore_index=True)

df_reshaped_polio3 = df_reshaped_polio3.sort_values(by=['CASEID', 'Child_Age']).reset_index(drop=True)

print(df_reshaped_polio3)

In [None]:
calculate_vaccination_coverage(df_reshaped_polio3, 12, 23, 'Polio3')

## MEASLES

## Results

In [None]:
print("BCG Vaccination Coverage for children between 12 to 23 months: {:.2f}%".format(percentage_vaccinated_BCG))
print("DPT1 Vaccination Coverage for children between 12 to 23 months: {:.2f}%".format(percentage_vaccinated_DPT1))
print("DPT2 Vaccination Coverage for children between 12 to 23 months: {:.2f}%".format(percentage_vaccinated_DPT2))
print("DPT3 Vaccination Coverage for children between 12 to 23 months: {:.2f}%".format(percentage_vaccinated_DPT3))
print("Polio1 Vaccination Coverage for children between 12 to 23 months: {:.2f}%".format(percentage_vaccinated_Polio1))
print("Polio2 Vaccination Coverage for children between 12 to 23 months: {:.2f}%".format(percentage_vaccinated_Polio2))
print("Polio3 Vaccination Coverage for children between 12 to 23 months: {:.2f}%".format(percentage_vaccinated_Polio3))