# Notebook for the analysis of tHFA data

Changes mades:

- Version 0.9
    - Updated oxygen function to correct formulas based on the tHFA
    - Updated guidelines function to correct formula (there was a problem with using "value == target1 or target2")
    - Minor changes to facility score indicator
    - Changed high quality header name to "Quality_Score"
- Version 0.8
    - Included testing capacity indicator
- Version 0.7
    - Included all indicators except testing capacity
    - Sent to TGF for comparison

In [1]:
import pandas as pd
import numpy as np

# Step 1: Load the CSV file into a pandas DataFrame
file_path = "./tHFA_MWI For Analysis Templet.csv"
df = pd.read_csv(file_path, header=None)

# Step 2: Extract the first and second rows and save them in a dictionary
metadata = dict(zip(df.iloc[1], df.iloc[0]))

# Step 3: Set the second row as the header and reload the DataFrame
df.columns = df.iloc[1]
df = df[2:]
df.set_index('Q100', inplace=True)

# Step 4: Extract the first 26 columns into a separate DataFrame for survey details
survey_details_df = df.iloc[:, :11]
facility_details_df = pd.concat([df.iloc[:, 11:23], df.iloc[:, 29:37]], axis=1)
facility_details_df = facility_details_df.drop(columns=['FI_2','FI_3',"Q100a",'Q106_a','Q109-Latitude','Q109-Longitude','Q109-Altitude', 'Q109-Accuracy'])


test = df[:5]
test_fac_df = facility_details_df[:5]
test_fac_df

1,Q102,Q105,Q106,Q105_a,Q113,Q113_A,Q116,Q116_A,Q117,Q118,tHFA_A001,tHFA_A002
Q100,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
MWI_000193,Chikwawa District Hospital,South West Zone,Chikwawa,SECONDARY,DISTRICT HOSPITAL,,LOCAL GOVERNMENT,,PERIURBAN,BOTH OUT AND INPATIENT,SECONDARY,628282
MWI_001282,Pirimiti Rural Hospital,South East Zone,Zomba,SECONDARY,HEALTH CENTRE,,MINISTRY OF HEALTH,,RURAL,BOTH OUT AND INPATIENT,SECONDARY,40137
MWI_000166,Chezi Mission Hospital,Central East Zone,Dowa,SECONDARY,HEALTH CENTRE,,CHRISTIAN HEALTH ASSOCIATION OF MALAWI (CHAM),,RURAL,BOTH OUT AND INPATIENT,PRIMARY,25177
MWI_001312,Salima District Hospital,Central East Zone,Salima,SECONDARY,DISTRICT HOSPITAL,,MINISTRY OF HEALTH,,URBAN,BOTH OUT AND INPATIENT,SECONDARY,85112
MWI_001109,Namadidi Health Centre,South East Zone,Zomba,PRIMARY,HEALTH CENTRE,,MINISTRY OF HEALTH,,RURAL,OUTPATIENT ONLY,PRIMARY,17100


In [None]:
# Placeholder function to calculate a score for each column based on the specific logic
def calculate_column_score(value, column):
    if column in ['tHFA_D004', 'tHFA_D005']:
        return 100 if value == 'YES' else 0
    elif column == 'tHFA_D006':
        return 0 if value == 'YES' else 100
    elif column == 'Q13004_1':
        return 
    elif column in  ["tHFA_D011_TOO_EXPENSIVE", 
                     'tHFA_D011_TOO_FAR',
                     'tHFA_D011_LONG_WAITING_TIME',
                     'tHFA_D011_FACILITY_OFTEN_CLOSED',
                     'tHFA_D011_HEALTH_WORKERS_NOT_AVAILABLE',
                     'tHFA_D011_HEALTH_WORKERS_NOT_RESPECTFUL',
                     'tHFA_D011_LACK_OF_TRANSPORT_TO_REACH_FACILITY',
                     'tHFA_D011_POOR_QUALITY_OF_CARE']:
        return 0 if any(option in value for option in ['1']) else 100
    elif column in ['PREQ132', 'PREQ122', 'PREQ125', 'PREQ119', 'PREQ129']:
        scale = {'NEVER': 0, 'RARELY': 25, 'SOMETIMES': 50, 'OFTEN': 75, 'ALWAYS': 100}
        return scale.get(value, np.nan)
    else:
        return 100 if value == 'YES' else 0
    

# Function to calculate scores for grouped columns
def calculate_grouped_score(row, columns):
    return 100 if all(row[col] == 'YES' for col in columns) else 0


# Define groups of columns to be assessed together
tb_screening_columns = [
        ['Q13408_01', 'Q13408_02', 'Q13408_03', 'tHFAG003'],
        ['TBSF002_A', 'TBSF002_B', 'TBSF002_C','TBSF002_D']
    ]


# Function to calculate indicators for each facility
def calculate_high_quali_service_indicator(row):
    # Calculate HW competence scores
    anc_columns = [
        'Q13007_3', #ipTp
        'tHFA_F003', #HIV test
        'Q13004_1', # gestational age
        'Q13004_8',# blood pressure
        'Q13004_10', #iron
        'Q13004_11', # counselling
        'Q13005_1', #Syphilis
        'Q13005_2', #worms
        'Q13005_5', #hemoglobin
    ]

    art_columns = [
        'tHFAG001'  

    ]

    tb_columns = [
        'Q13510_01', 'Q13507_01'
    ]

    # Count the number of records reviewed for ART, ANC, and TB
    art_count = sum(1 for col in ['RR_ART_Begin_1', 'RR_ART_Begin_2', 'RR_ART_Begin_3', 'RR_ART_Begin_4', 'RR_ART_Begin_5'] if row[col] == 'YES')
    anc_count = sum(1 for col in ['RR_ANC_Begin_1', 'RR_ANC_Begin_2', 'RR_ANC_Begin_3', 'RR_ANC_Begin_4', 'RR_ANC_Begin_5'] if row[col] == 'YES')
    tb_count = sum(1 for col in ['RR_TB_Begin_1', 'RR_TB_Begin_2', 'RR_TB_Begin_3', 'RR_TB_Begin_4', 'RR_TB_Begin_5'] if row[col] == 'YES')

    hw_scores = []
    for i in range(1, art_count + 1):
        hw_scores.extend([calculate_column_score(row[f'{col}_{i}'], col) for col in art_columns])
    
    for i in range(1, anc_count + 1):
        hw_scores.extend([calculate_column_score(row[f'{col}_{i}'], col) for col in anc_columns])
    
    for i in range(1, tb_count + 1):
        hw_scores.extend([calculate_column_score(row[f'{col}_{i}'], col) for col in tb_columns])
    
    # for group in tb_screening_columns:
    #     for i in range(1,6):
    #         hw_scores.append(calculate_grouped_score(row, group))
    

    
    hw_score = np.nanmean(hw_scores)

    # Calculate Patient centeredness scores
    patient_centeredness_columns = [
        'PREQ132', 'PREQ122', 'PREQ125', 'PREQ119', 'PREQ129',
        'tHFA_D003', 'tHFA_D004', 'tHFA_D005', 'tHFA_D006', 'tHFA_D011'
    ]
    patient_count = int(row['PATIENT_EXIT_INTERVIEWS_count'])
    pc_scores = []
    
    for i in range(1, patient_count + 1):
        for col in patient_centeredness_columns:
            interview_col = f'{col}_{i}'
            if interview_col in row:
                pc_scores.append(calculate_column_score(row[interview_col], col))
    
    pc_score = np.nanmean(pc_scores)
    
    # Calculate facility score
    # facility_score = np.nanmean([hw_score, pc_score])
    
    return pd.Series({
        'HW_Competence_Score': hw_score,
        'Patient_Centeredness_Score': pc_score,
        # 'Facility_Score': facility_score
    })

In [None]:
# Example function

import pandas as pd
import numpy as np


def calculate_kpi_s1(row):
    """KPI S1: Provision of integrated people-centered high-quality services"""
    # Placeholder implementation for dimension calculations
    hw_competence_score = np.nanmean([calculate_column_score(row, col) for col in hw_competence_columns])
    patient_centeredness_score = np.nanmean([calculate_column_score(row, col) for col in patient_centeredness_columns])

    # Calculate the facility score as the average of the two dimension scores
    facility_score = np.nanmean([hw_competence_score, patient_centeredness_score])
    
    return facility_score


def calculate_kpi_s2(row):
    """KPI S2: Provision of integrated supportive supervision"""
    attributes = [
        'attribute_1', 'attribute_2', 'attribute_3', 
        'attribute_4', 'attribute_5'
    ]

    # Calculate attribute scores
    attribute_scores = [100 if row[attr] == 'YES' else 0 for attr in attributes]
    
    # Adjust calculation based on CHW linkage
    if row['CHW_linked'] == 'YES':
        score = np.nanmean(attribute_scores[1:])
    else:
        score = np.nanmean(attribute_scores[1:4])
    
    return score


def calculate_kpi_s3(row):
    """KPI S3: HTM integrated services offered to pregnant women"""
    attributes = ['HIV_testing', 'ART_access', 'IPTp', 'TB_screening']
    attribute_scores = [100 if row[attr] == 'YES' else 0 for attr in attributes]
    score = np.nanmean(attribute_scores)
    
    return score


def calculate_kpi_s5(row):
    """KPI S5: Systems readiness for CHWs"""
    attributes = ['supervision', 'contract', 'paid', 'stockouts']
    attribute_scores = [100 if row[attr] == 'YES' else 0 for attr in attributes]
    score = np.nanmean(attribute_scores)
    
    return score


def calculate_rssh_pp_hrh_6(row):
    """RSSH/PP HRH-6: Percentage of facilities providing effective services"""
    attributes = [
        'integrated_HTM_services', 'provider_absence', 'provider_caseload', 
        'ANC_dropout', 'DPT_dropout', 'TB_treatment_completion', 'ART_retention'
    ]
    
    attribute_scores = [calculate_individual_attribute(row, attr) for attr in attributes]
    score = np.nanmean(attribute_scores)
    
    return score


def calculate_rssh_pp_hrh_3(row):
    """RSSH/PP HRH-3: Percentage of community health workers remunerated on time and in-full"""
    return 100 if row['paid'] == 'YES' else 0


def calculate_css_3(row):
    """CSS-3: Percentage of health service delivery sites with a community-led monitoring mechanism in place"""
    questions = ['B021', 'B022', 'B023', 'B024', 'B035', 'B045']
    agree_count = sum(1 for q in questions if row[q] in ['strongly agree', 'agree'])
    score = (agree_count / len(questions)) * 100
    
    return score


def calculate_hsg_1_1(row):
    """HSG-1.1: Percentage of facilities with written and updated clinical guidelines"""
    services = ['HIV', 'TB', 'Malaria', 'ANC', 'Immunizations', 'IMCI']
    guideline_scores = [100 if row[service] == 'YES' else 0 for service in services]
    score = np.nanmean(guideline_scores)
    
    return score


def calculate_rssh_pp_rcs_1(row):
    """RSSH/PP RCS-1: Percentage of health facilities able to provide oxygen therapy related services"""
    oxygen_availability = row['oxygen_availability']
    pulse_oximetry = row['pulse_oximetry']
    score = 100 if oxygen_availability == 'YES' and pulse_oximetry == 'YES' else 50 if oxygen_availability == 'YES' or pulse_oximetry == 'YES' else 0
    
    return score


def calculate_rssh_pp_lab_5(row):
    """RSSH/PP LAB-5: Percentage of health facilities with appropriate set of diagnostics"""
    diagnostics = ['malaria_test', 'covid_test', 'syphilis_test', 'HIV_test', 'blood_sugar_test', 'proteinuria_test', 'pregnancy_test']
    available_diagnostics = sum(1 for test in diagnostics if row[test] == 'YES')
    required_diagnostics = len(diagnostics)
    score = (available_diagnostics / required_diagnostics) * 100
    
    return score


def calculate_rssh_pp_hrh_1(row):
    """RSSH/PP HRH-1: Vacancy rate"""
    funded_posts = row['funded_posts']
    unfilled_posts = row['unfilled_posts']
    score = (unfilled_posts / funded_posts) * 100 if funded_posts > 0 else np.nan
    
    return score


# Placeholder implementation for calculate_column_score and calculate_individual_attribute
def calculate_column_score(row, column):
    """Calculates the score for a specific column based on custom logic."""
    return 100 if row[column] == 'YES' else 0


def calculate_individual_attribute(row, attribute):
    """Calculates the score for an individual attribute."""
    if attribute == 'provider_absence':
        return (1 - row[attribute]) * 100
    elif attribute == 'provider_caseload':
        return min(row[attribute] * 5, 100)
    else:
        return row[attribute] * 100


# Example of applying the functions to a DataFrame
def apply_indicator_calculations(df):
    """Applies indicator calculations to each row in the DataFrame."""
    df['KPI_S1'] = df.apply(calculate_kpi_s1, axis=1)
    df['KPI_S2'] = df.apply(calculate_kpi_s2, axis=1)
    df['KPI_S3'] = df.apply(calculate_kpi_s3, axis=1)
    df['KPI_S5'] = df.apply(calculate_kpi_s5, axis=1)
    df['RSSH_PP_HRH_6'] = df.apply(calculate_rssh_pp_hrh_6, axis=1)
    df['RSSH_PP_HRH_3'] = df.apply(calculate_rssh_pp_hrh_3, axis=1)
    df['CSS_3'] = df.apply(calculate_css_3, axis=1)
    df['HSG_1_1'] = df.apply(calculate_hsg_1_1, axis=1)
    df['RSSH_PP_RCS_1'] = df.apply(calculate_rssh_pp_rcs_1, axis=1)
    df['RSSH_PP_LAB_5'] = df.apply(calculate_rssh_pp_lab_5, axis=1)
    df['RSSH_PP_HRH_1'] = df.apply(calculate_rssh_pp_hrh_1, axis=1)
    
    return df


# Sample DataFrame setup and function application
# df = pd.read_csv('/mnt/data/tHFA_MWI For Analysis Templet.csv')
# df = apply_indicator_calculations(df)
# df.to_csv('/mnt/data/indicators_calculated.csv', index=False)


In [69]:
# Function for the calculation of vacancy rate # Changed from the indicator definition in Annex 1

def calculate_vacancy(row):
    """RSSH/PP HRH-1: Vacancy rate"""
    funded_posts = float(row['tHFA_B046'])
    unfilled_posts = float(row['tHFA_B047'])
    if funded_posts != 999 and unfilled_posts != 999:
        score = ((unfilled_posts / funded_posts)) * 100 
    else: score = np.nan 
    
    return   pd.Series({
        'vacancy_score': score,
        'all_funded_posts': funded_posts,
        'all_unfilled_posts': unfilled_posts,
    })

In [3]:
# Function for calculation of oxygen therapy (deprecated)

# def calculate_oxygen_old(row):
    # """
    # RSSH/PP RCS-1: Percentage of health facilities able to provide oxygen therapy related services
    
    # Using only oxygen currently available in this unit and oxygen be brought to this unit from a different unit

    # """
    # provides_oxygen = 'YES' if row["R1323"] == 'YES' else "NO"
    # oxygen_availability = 'YES' if row['R1324'] == 'YES' or row['R1326'] == 'YES' else 'NO'
    # pulse_oximetry = 'YES' if (row['R1324'] == 'YES' or row['R1326'] == 'YES') else "NO"
    
    # if provides_oxygen == 'NO':
    #     score = np.nan
    # elif provides_oxygen == 'YES' and oxygen_availability == 'YES' and pulse_oximetry == 'YES':
    #     score = 100
    # elif provides_oxygen == 'YES' and (oxygen_availability == 'YES' or pulse_oximetry == 'YES'):
    #     score = 50
    # else:
    #     score = 0
    
    # return pd.Series({
    #     'oxygen_score': score
    # })

In [41]:
# Function for calculation of oxygen therapy

def calculate_oxygen(row):
    """
    RSSH/PP RCS-1: Percentage of health facilities able to provide oxygen therapy related services
    
    Using only oxygen currently available in this unit and oxygen be brought to this unit from a different unit

    """
    provides_oxygen = 'YES' if row["R1323"] == 'YES' else "NO"
    obs_options = ['OBSERVED','REPORTED, NOT SEEN']
    
    if provides_oxygen == 'YES':
       avl = 100 if any ([row ['R1324'] == 'YES', row['R1326'] == 'YES']) and all([row[f'R1325_0{i}A'] in obs_options for i in ['4','5','6']]) and all([row[f'R1325_0{i}B'] == 'YES' for i in ['4','5','6']]) else 0
       avo = 100 if all([row[f'R1322_09A'] in obs_options , row[f'R1322_09B'] == 'YES' ]) else 0
    else:
        avl = np.nan
        avo = np.nan
    
    score = np.nanmean([avl,avo])

    return pd.Series({
        'oxygen_score': score,
        # 'check': [avl,avo]
    })

# see = test.apply(calculate_oxygen, axis=1)
# see


In [4]:
# Function for calculation of CHW paid on time and in full

def calculate_CHW_paid(row):
    
    """

    RSSH/PP HRH-3: Percentage of community health workers remunerated on time and in-full
    
    Paid is calculated with 'tHFA_E020','tHFA_E021', 'tHFA_E022', 'tHFA_E023' and all need to be present to have the value of 100

    """
    chw_count = sum(1 for col in ['CHW_begin_1', 'CHW_begin_2', 'CHW_begin_3', 'CHW_begin_4', 'CHW_begin_5'] if row[col] == 'YES')

    chw_paid_scores = []

    paid_columns = ['tHFA_E020','tHFA_E021', 'tHFA_E022', 'tHFA_E023']

    for i in range(1, chw_count + 1):
        chw_i_score = 0
        for col in paid_columns:
            interview_col = f'{col}_{i}'
            if row[interview_col] == 'YES': 
                chw_i_score = chw_i_score + 1
        chw_paid = 100 if chw_i_score == 4 else 0  
        chw_paid_scores.append(chw_paid)

    chw_paid_score = np.nanmean(chw_paid_scores)
    return pd.Series({
        # 'CHW_count': chw_count,
        'CHW_paid': chw_paid_score,
        'CHW_scores': chw_paid_scores  
        })

In [65]:
# Function for calculation of facility composite indicator 

def calculate_facility_composite(row):
    
    """

    RSSH/PP HRH-6: Percentage of facilities providing effective services

    Composite facility level indicator with seven components: 
    1)	% of facilities observed to provide integrated services at ANC (TB, malaria, HIV) at the time of visit; 
    2)	Provider availability (absence rate on day of visit);
    3)	Provider caseload (number of outpatient visits per clinician per day);
    4)	ANC dropout rate; 
    5)	DPT dropout rate; 
    6)	Treatment completion rate for new TB cases;
    7)	Twelve-month retention on ART


    """

    provides_ART = 1 if row['R2312'] == 'YES' else 0
    provides_TB = 1 if row['R2400'] == 'YES' else 0
    provides_immu = 1 if row['R2100'] == 'YES' else 0
    provides_imci = 1 if row['R2000'] == 'YES' else 0
    provides_malaria = 1 if row['R1400'] == 'YES' else 0
    provides_hiv_test = 1 if row['R2300'] == 'YES' else 0
    provides_anc = 1 if row['R1810'] == 'YES' else 0
    provides_testing = 1 if row['R2900'] == 'YES' else 0
    provides_oxygen = 1 if row["R1323"] == 'YES' else 0

    outpatients_ystrdy = float(row['tHFA_B034'])
    clincians_present = float(row['tHFA_B026'])
    clinicians_today = float(row['tHFA_B025'])

    # Attribute 1 (Integrated HTM with ANC)
    if provides_anc > 0:
        htm_anc_fxn = calculate_HTM_ANC(row)
        htm_anc_score = htm_anc_fxn['HTM-ANC']
    else:
        htm_anc_score = np.nan


    # Attribute 2 (Clincian presence)
    if clinicians_today > 0 and clinicians_today != 999 and clincians_present != 999:
        presence = (clincians_present/clinicians_today)*100
    else: presence = np.nan 


    # Attribute 3 (Caseload)
    if clincians_present > 0 and outpatients_ystrdy != 999 and clincians_present != 999:
        caseload = (outpatients_ystrdy/clincians_present)*5
        if caseload >100:
            caseload = 100 
    else: caseload = np.nan

    

    # Attribute 4 (ANC dropout rate)
    anc_scores = []
    if provides_anc > 0:
        anc_count = sum(1 for col in ['RR_ANC_Begin_1', 'RR_ANC_Begin_2', 'RR_ANC_Begin_3', 'RR_ANC_Begin_4', 'RR_ANC_Begin_5'] if row[col] == 'YES')
        for i in range(1, anc_count + 1):            
            if int(row[f'Q13004_7_{i}']) == 99:
                anc_scores.append(np.nan)
            elif int(row[f'Q13004_7_{i}']) > 3 :
                anc_scores.append(100) 
            else: anc_scores.append(0)

        anc_score = np.nanmean(anc_scores)

    else:
        anc_score = np.nan


    # Attribute 5 (DTP dropout rate)
    DPT_dropout = np.nan
    if provides_immu > 0:
        if float(row['tHFA_B032']) != 999 and float(row['tHFA_B032']) != 0 and float(row['tHFA_B033']) != 999:
            DPT_dropout = (float(row['tHFA_B032']) - float(row['tHFA_B033']))/float(row['tHFA_B032'])
            DPT_score = (1-DPT_dropout) * 100
        else:
            DPT_score = np.nan
    
    else: 
        DPT_score = np.nan


    # Attribute 6 (treatment completion rate for new TB clients)
    if provides_TB > 0:
        tb_count = sum(1 for col in ['RR_TB_Begin_1', 'RR_TB_Begin_2', 'RR_TB_Begin_3', 'RR_TB_Begin_4', 'RR_TB_Begin_5'] if row[col] == 'YES')
        tb_trtmnt_scores = []
        
        for i in range(1, tb_count + 1):
            tb_trtmnt = np.nan
            if row[f'Q13506_04_{i}'] == 'YES, CLIENT WAS CURED OR COMPLETED TREATMENT':
                tb_trtmnt = 1
            elif row[f'Q13506_04_{i}'] == np.nan :
                tb_trtmnt = np.nan
            else: 
                tb_trtmnt = 0

            tb_trtmnt_scores.append(tb_trtmnt)
        
        tb_trtmnt_score = (np.nansum(tb_trtmnt_scores)/tb_count) * 100
    
    else: 
        tb_trtmnt_score = np.nan
        tb_trtmnt_scores = []

    # Attribute for ART 12 month retention
    if provides_ART > 0:   
        art_count = sum(1 for col in ['RR_ART_Begin_1', 'RR_ART_Begin_2', 'RR_ART_Begin_3', 'RR_ART_Begin_4', 'RR_ART_Begin_5'] if row[col] == 'YES')
        art_retnt_scores = []
        for i in range(1, art_count + 1):
            art_retnt = np.nan
            if row[f'tHFAG002_{i}'] == 'NO': 
                art_retnt = np.nan
            elif row[f'tHFAG002_{i}'] == 'YES' and row[f'tHFAG005_{i}'] == 'YES':
                art_retnt = 100
            else:
                art_retnt = 0
            
            art_retnt_scores.append(art_retnt)
        art_retnt_score = np.nanmean(art_retnt_scores)
    else:
        art_retnt_score = np.nan
        art_retnt_scores = []
    

    # Aggregation of scores
    attribute_scores = [htm_anc_score,presence,caseload,anc_score,DPT_score,tb_trtmnt_score,art_retnt_score]
    comp_score = np.nanmean(attribute_scores)
    
    
    return pd.Series({
        'facility_composite': comp_score,
        'HTM-ANC': htm_anc_score,
        'presence_score': presence,
        'caseload': caseload,
        'ANC_score': anc_score,
        'DPT_score': DPT_score,
        'TB_treatment_completion': tb_trtmnt_score,
        'ART_retention': art_retnt_score,
        
        })


# see = test.apply(calculate_facility_composite, axis=1)
# see

  tb_trtmnt_score = (np.nansum(tb_trtmnt_scores)/tb_count) * 100
  art_retnt_score = np.nanmean(art_retnt_scores)


Unnamed: 0_level_0,facility_composite,HTM-ANC,presence_score,caseload,ANC_score,DPT_score,TB_treatment_completion,ART_retention
Q100,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MWI_000193,68.333333,50.0,40.0,20.0,100.0,,100.0,100.0
MWI_001282,52.082703,63.333333,88.461538,12.173913,0.0,96.444732,,
MWI_000166,73.75,,100.0,100.0,,95.0,0.0,
MWI_001312,90.0,60.0,,,100.0,,100.0,100.0
MWI_001109,62.571608,66.666667,28.571429,100.0,0.0,92.763158,100.0,50.0


In [6]:
# Function for calculation of integrated HTM for ANC clients 

def calculate_HTM_ANC(row):
    
    """

    Pregnant women receive the following services in an integrated fashion: 
    HIV counseling and testing, access to ARVs for HIV positive women, IPTp, 
    and TB screening.


    """

    provides_ART = 1 if row['R2312'] == 'YES' else 0
    provides_TB = 1 if row['R2400'] == 'YES' else 0
    provides_malaria = 1 if row['R1400'] == 'YES' else 0
    provides_hiv_test = 1 if row['R2300'] == 'YES' else 0
    provides_anc = 1 if row['R1810'] == 'YES' else 0
    provides_testing = 1 if row['R2900'] == 'YES' else 0
    provides_oxygen = 1 if row["R1323"] == 'YES' else 0

       
    htm_anc_scores = []
    if provides_anc > 0:
        anc_count = sum(1 for col in ['RR_ANC_Begin_1', 'RR_ANC_Begin_2', 'RR_ANC_Begin_3', 'RR_ANC_Begin_4', 'RR_ANC_Begin_5'] if row[col] == 'YES')
        for i in range(1, anc_count + 1):            
            client_anc_scores = []
            # Attribute 1: HIV testing and counselling
            if row[f'Q13009_1_{i}'] == 'YES':
                client_anc_scores.append(np.nan)
            elif row[f'tHFA_F003_{i}'] == 'YES':
                client_anc_scores.append(100)
            elif row[f'tHFA_F003_{i}'] == 'NO':
                client_anc_scores.append(0)
            else:
                client_anc_scores.append(np.nan)

            # Attribute 2: Access to ART
            if row[f'tHFA_F001_{i}'] == 'NO':
                client_anc_scores.append(np.nan)
            elif row[f'tHFA_F001_{i}'] == 'YES' and row[f'tHFA_F004_{i}'] == 'YES':
                client_anc_scores.append(100)
            elif row[f'tHFA_F001_{i}'] == 'YES' and row[f'tHFA_F004_{i}'] == 'NO':
                client_anc_scores.append(0)
            else:
                client_anc_scores.append(np.nan)

            # Attribute 3: Access to iptp
            if row[f'Q13007_3_{i}'] == 'YES':
                client_anc_scores.append(100)
            elif row[f'Q13007_3_{i}'] == 'NO':
                client_anc_scores.append(0)
            else: 
                client_anc_scores.append(np.nan)

            # Attribute 4: Access to TB screening
            tb_values = [row[f'TBSF002_A_{i}'],
                         row[f'TBSF002_B_{i}'], 
                         row[f'TBSF002_C_{i}'],
                         row[f'TBSF002_D_{i}']]
            if all(val == 'YES' for val in tb_values):
                client_anc_scores.append(100)
            else:
                client_anc_scores.append(0)

            client_anc_score = np.nanmean(client_anc_scores)
            htm_anc_scores.append(client_anc_score)
        

        htm_anc_score = np.nanmean(htm_anc_scores)

    else:
        htm_anc_score = np.nan

    
    return pd.Series({
        'HTM-ANC': htm_anc_score,
        'htm-anc-check': htm_anc_scores
        })


In [7]:
# Function for evaluating testing capacity
def calculate_testing_capacity(row):
    
    """

   Description
   RSSH/PP LAB-5: “Percentage of health facilities that have an appropriate set of diagnostics 
   for their healthcare facility level, based on adapted WHO model list of essential in vitro diagnostics (EDL 3)”


    This needs data for the EDL tests from the 
   
    """

    # Responses for facilities with labs
    test_facility_questions = [f'EDL_RESPONSE_0{i}' for i in range(1,10)]
    test_facility_questions.extend([f'EDL_RESPONSE_{i}' for i in range(10,28)])
    
    # Responses for facilities without labs 
    all_facility_questions = test_facility_questions + ['EDL_RESPONSE_28']
    remove = [f'EDL_RESPONSE_{i}' for i in range(10,19)] + [f'EDL_RESPONSE_0{i}' for i in range(8,10)] + [f'EDL_RESPONSE_{i}' for i in range(22,28)]
    remove_set = set(remove)
    non_test_facility_questions = [thing for thing in all_facility_questions if thing not in remove_set]
    

    test_capac_scores = []
    

    if row['R2900'] == 'YES, WITH LABORATORY IN THE FACILITY':
        for question in test_facility_questions:
            if row[question] == 'YES': test_capac_scores.append(100)
            elif row[question] == 'NO': test_capac_scores.append(0)
            else: test_capac_scores.append(np.nan)

    elif row['R2900'] == 'YES, WITHOUT LABORATORY IN THE FACILITY':
        for question in non_test_facility_questions:
            if row[question] == 'YES': test_capac_scores.append(100)
            elif row[question] == 'NO': test_capac_scores.append(0)
            else: test_capac_scores.append(np.nan)
        
    elif row['R2900'] == 'NO LABORATORY TESTS PERFORMED':
        test_capac_scores.append(np.nan)
    
    
    test_capac_score = np.nanmean(test_capac_scores)





    return pd.Series({
        'test-capac': test_capac_score,
        'test-capac_check': test_capac_scores
        })


In [58]:
# Function for guideline availability

def calculate_guideline_availability(row):
    
    """

   “Percentage of facilities with written and updated clinical guidelines for HIV, TB, malaria and/or 
   PHC (based on the services provided) developed by the national or sub-national government 
   (as appropriate by the country context)
   

    Returns a guideline availability score that is a np.nanmean of the scores for the relevant diseases 
    """

    provides_ART = 1 if row['R2312'] == 'YES' else 0
    provides_TB = 1 if row['R2400'] == 'YES' else 0
    provides_immu = 1 if row['R2100'] == 'YES' else 0
    provides_imci = 1 if row['R2000'] == 'YES' else 0
    provides_malaria = 1 if row['R1400'] == 'YES' else 0
    provides_hiv_test = 1 if row['R2300'] == 'YES' else 0
    provides_anc = 1 if row['R1810'] == 'YES' else 0
    provides_testing = 1 if row['R2900'] == 'YES' else 0
    provides_oxygen = 1 if row["R1323"] == 'YES' else 0

    guidelines = [provides_hiv_test, 
                  provides_ART,
                  provides_TB,
                  provides_malaria,
                  provides_anc,
                  provides_immu,
                  provides_imci]
    
    guideline_cols = ['R2308_01',
                      'R2315B_01',
                      'R2419_01',
                      'R1406',
                      'R1817_01',
                      'R2119_01',
                      'R2005_01']
    guideline_scores = []

    for i in range(len(guidelines)): 
        if guidelines[i] > 0 :
            if row[guideline_cols[i]] in ['YES, OBSERVED','YES, REPORTED, NOT SEEN']:
                guideline_scores.append(100)
            else: 
                guideline_scores.append(0)
        else: 
            guideline_scores.append(np.nan)
    
    
    guideline_score = np.nanmean(guideline_scores)

    return pd.Series({
        'guideline_score': guideline_score,
        'guideline_check': guideline_scores,
         })

# see = test.apply(calculate_guideline_availability, axis=1)
# see


Unnamed: 0_level_0,guideline_score,check
Q100,Unnamed: 1_level_1,Unnamed: 2_level_1
MWI_000193,85.714286,"[100, 100, 100, 100, 100, 0, 100]"
MWI_001282,100.0,"[100, 100, 100, 100, 100, 100, 100]"
MWI_000166,100.0,"[100, nan, 100, 100, nan, 100, 100]"
MWI_001312,100.0,"[100, 100, 100, 100, 100, 100, 100]"
MWI_001109,42.857143,"[0, 0, 0, 0, 100, 100, 100]"


In [52]:
# Function for  checking community-led mechanism 
# # Unclear how don't knows should be treated here
def calculate_CLM(row):
    
    """

   Indicator for CSS-3: “Percentage of health service delivery 
   sites with a community-led monitoring mechanism in place”

   Returns:
    0 - 100 based on number of strongly agree or agree responses

    """

    questions = ['tHFA_B021', 'tHFA_B022', 'tHFA_B023', 'tHFA_B024', 'tHFA_B035', 'tHFA_B045']
    agree_count = sum(1 for q in questions if row[q] in ['STRONGLY AGREE', 'AGREE'])
    score = (agree_count / len(questions)) * 100


    return pd.Series({
        'CLM_score': score,      
        })

In [10]:
# Function to calculate system readiness

def calculate_system_CHW(row):
    
    """

   KPI S5: “Systems readiness for CHWs”


    """

    chw_count = sum(1 for col in ['CHW_begin_1', 'CHW_begin_2', 'CHW_begin_3', 'CHW_begin_4', 'CHW_begin_5'] if row[col] == 'YES')
    
    paid_columns = ['tHFA_E020','tHFA_E021', 'tHFA_E022', 'tHFA_E023']

    chw_scores = []

    for i in range(1, chw_count+1):
        chw_i_score = 0
        chw_i_scores = []

        # Attribute 1 (Integrated Supportive Supervision)
        iss_score = 0
        a = 3
        chw_services =  [f'tHFA_E00{a}_A_{i}',f'tHFA_E00{a}_B_{i}', f'tHFA_E00{a}_C_{i}', f'tHFA_E00{a}_D_{i}',f'tHFA_E00{a}_E_{i}']
        for a in [3]:
            expected = sum(1 for disease in chw_services if row[disease] == 'YES')
        for a in [5,7,9]:
            sprvsn = sum(1 for service in chw_services if row[service] == 'YES' )
            if sprvsn == expected: iss_score = iss_score+1
        chw_iss = 100 if iss_score > 0 else 0
        chw_i_scores.append(chw_iss)

        # Attribute 2 (Presence of contract)
        contract = [f'tHFA_E0{b}_{i}' for b in range(10,20)]
        contract_scores = []
        for question in contract:
            if row[question]=='YES': contract_scores.append(100)
            elif row[question]=='NO': contract_scores.append(0)
            else: contract_scores.append(np.nan)
        chw_contract = 100 if np.nanmean(contract_scores) == 100 else 0
        chw_i_scores.append(chw_contract)
        
        # Attribute 3 (Payment accoording to contract)
        paid_score = 0 
        for col in paid_columns:
            interview_col = f'{col}_{i}'
            if row[interview_col] == 'YES': 
                paid_score = paid_score + 1
        chw_paid = 100 if paid_score == 4 else 0  
        chw_i_scores.append(chw_paid)

        # Attribute 4 (Job tools)
        tools = [f'tHFA_E0{b}_{i}' for b in [25,27,29]]
        tools_scores = []
        for question in tools:
            if row[question]=='YES': tools_scores.append(100)
            elif row[question]=='NO': tools_scores.append(0)
            else: tools_scores.append(np.nan)
        chw_tools = 100 if np.nanmean(tools_scores) == 100 else 0
        chw_i_scores.append(chw_tools)

        chw_i_score = np.nanmean(chw_i_scores)
        chw_scores.append(chw_i_score)

    chw_score = np.nanmean(chw_scores)

    return pd.Series({
        'system_CHW_score': chw_score,
        'chw_check': chw_scores
        })

In [66]:
# Function for high quality service
def calculate_service_quality(row):
    
    """

   Description


    """

    provides_ART = 1 if row['R2312'] == 'YES' else 0
    provides_TB = 1 if row['R2400'] == 'YES' else 0
    provides_ANC = 1 if row['R1810'] == 'YES' else 0



    # Calculate HW competence scores
    # ANC scores
    hw_scores = []
    anc_scores = []
    tb_scores = []
    art_scores = []

    if provides_ANC > 0:
        anc_count = sum (1 for col in [f'RR_ANC_Begin_{a}' for a in range(1,6)] if row[col] == 'YES')
        
        for i in range(1, anc_count+1):
            client_anc_scores = []
             # Attribute 1: HIV testing and counselling
            if row[f'Q13009_1_{i}'] == 'YES':
                client_anc_scores.append(np.nan)
            elif row[f'tHFA_F003_{i}'] == 'YES':
                client_anc_scores.append(100)
            elif row[f'tHFA_F003_{i}'] == 'NO':
                client_anc_scores.append(0)
            else:
                client_anc_scores.append(np.nan)

            # Attribute 2: Access to ART
            if row[f'tHFA_F001_{i}'] == 'NO':
                client_anc_scores.append(np.nan)
            elif row[f'tHFA_F001_{i}'] == 'YES' and row[f'tHFA_F004_{i}'] == 'YES':
                client_anc_scores.append(100)
            elif row[f'tHFA_F001_{i}'] == 'YES' and row[f'tHFA_F004_{i}'] == 'NO':
                client_anc_scores.append(0)
            else:
                client_anc_scores.append(np.nan)

            # Attribute 3: Access to iptp
            if row[f'Q13007_3_{i}'] == 'YES':
                client_anc_scores.append(100)
            elif row[f'Q13007_3_{i}'] == 'NO':
                client_anc_scores.append(0)
            else: 
                client_anc_scores.append(np.nan)

            # Attribute 4: Access to TB screening
            tb_values = [row[f'TBSF002_A_{i}'],
                         row[f'TBSF002_B_{i}'], 
                         row[f'TBSF002_C_{i}'],
                         row[f'TBSF002_D_{i}']]
            if all(val == 'YES' for val in tb_values):
                client_anc_scores.append(100)
            else:
                client_anc_scores.append(0)
            
            # Attritube 5, 7, 8, 9, 10, 11
            # Integrated ANC and SRH, Blood pressure, Folic acid, Danger signs
            # Intestinal worms, and hemoglobin
            q = ['13005_1',
                    '13004_8',
                    '13004_10',
                    '13004_11',
                    '13005_2',
                    '13005_5']
            
            for attr in q: 
                if row[f'Q{attr}_{i}'] == 'YES':
                    client_anc_scores.append(100)
                elif row[f'Q{attr}_{i}'] == 'NO':
                    client_anc_scores.append(0)
                else: 
                    client_anc_scores.append(np.nan)

            # Attribute 6: ANC age
            if int(row[f'Q13004_1_{i}']) >= 32:
                client_anc_scores.append(100)
            elif int(row[f'Q13004_1_{i}']) < 32:
                client_anc_scores.append(0)
            else: 
                client_anc_scores.append(np.nan)
            
            client_anc_score = np.nanmean(client_anc_scores)
            anc_scores.append(client_anc_score)

    if provides_ART > 0:
        art_count = sum (1 for col in [f'RR_ART_Begin_{a}' for a in range(1,6)] if row[col] == 'YES')
        
        for i in range(1, art_count+1):
            client_art_scores = []
            # Attribute 1: TB screening
            tb_values = [row[f'Q13408_01_{i}'],
                        row[f'Q13408_02_{i}'], 
                        row[f'Q13408_03_{i}'],
                        row[f'tHFAG003_{i}']]
            if all(val == 'YES' for val in tb_values):
                client_art_scores.append(100)
            else:
                client_art_scores.append(0)
            
            #Attribute 2 : TB treatment for ART patients
            if row[f'Q13408_08_{i}'] == 'ACTIVE TB':
                client_art_scores.append(100) if row[f'Q13408_09_{i}'] == 'YES' else client_art_scores.append(0) 
            else: client_art_scores.append(np.nan)

            # Attribute 3: BP measured
            bp = 'tHFAG001'
            if row[f'{bp}_{i}'] == 'YES':
                client_art_scores.append(100)
            elif row[f'{bp}_{i}'] == 'NO':
                client_art_scores.append(0)
            else: 
                client_art_scores.append(np.nan)
            
            client_art_score = np.nanmean(client_art_scores)
            art_scores.append(client_art_score)
        
    if provides_TB >0 :
        tb_count = sum (1 for col in [f'RR_TB_Begin_{a}' for a in range(1,6)] if row[col] == 'YES')

        for i in range(1,tb_count+1):
            client_tb_scores=[]

            # Attribute 1 and 2:
            # HIV test and symptpms
            tb = ['Q13507_01',
                'Q13510_01']
        
            for attr in tb: 
                if row[f'{attr}_{i}'] == 'YES':
                    client_tb_scores.append(100)
                elif row[f'{attr}_{i}'] == 'NO':
                    client_tb_scores.append(0)
                else: 
                    client_tb_scores.append(np.nan)
            
            client_tb_score = np.nanmean(client_tb_scores)
            tb_scores.append(client_tb_score)
    
    

    hw_scores = anc_scores + art_scores + tb_scores

    hw_score = np.nanmean(hw_scores) if len(hw_scores) > 0 else np.nan
   

    def calculate_column_score(value, column):
        if column in ['tHFA_D004', 'tHFA_D005']:
            return 100 if value == 'YES' else 0
        elif column == 'tHFA_D006':
            return 0 if value == 'YES' else 100
        elif column == 'Q13004_1':
            return 
        elif column in  ["tHFA_D011_TOO_EXPENSIVE", 
                        'tHFA_D011_TOO_FAR',
                        'tHFA_D011_LONG_WAITING_TIME',
                        'tHFA_D011_FACILITY_OFTEN_CLOSED',
                        'tHFA_D011_HEALTH_WORKERS_NOT_AVAILABLE',
                        'tHFA_D011_HEALTH_WORKERS_NOT_RESPECTFUL',
                        'tHFA_D011_LACK_OF_TRANSPORT_TO_REACH_FACILITY',
                        'tHFA_D011_POOR_QUALITY_OF_CARE']:
            return 0 if any(option in value for option in ['1']) else 100
        elif column in ['PREQ132', 'PREQ122', 'PREQ125', 'PREQ119', 'PREQ129']:
            scale = {'NEVER': 0, 'RARELY': 25, 'SOMETIMES': 50, 'OFTEN': 75, 'ALWAYS': 100}
            return scale.get(value, np.nan)
        else:
            return 100 if value == 'YES' else 0

    # Calculate Patient centeredness scores
    patient_centeredness_columns = [
        'PREQ132', 
        'PREQ122', 
        'PREQ125', 
        'PREQ119', 
        'PREQ129',
        'tHFA_D003', 
        'tHFA_D004', 
        'tHFA_D006', 
        'tHFA_D011'
    ]
    patient_count = sum(1 for col in [f'PE_Begin_{i}' for i in range(1,6)] if row[col] == 'YES')
    pc_scores = []
    
    for i in range(1, patient_count + 1):
        for col in patient_centeredness_columns:
            interview_col = f'{col}_{i}'
            if interview_col in row:
                pc_scores.append(calculate_column_score(row[interview_col], col))
    
    pc_score = np.nanmean(pc_scores) if len(pc_scores) > 0 else np.nan
    
    # Calculate facility score
    facility_score = np.nanmean([hw_score, pc_score])
    
    return pd.Series({
        'Quality_Score': facility_score,
        'HW_Competence_Score': hw_score,
        # 'Check': hw_scores,
        'Patient_Centeredness_Score': pc_score,
        # 'Check': pc_scores
        
    })

In [71]:
# Function for integrated supportive supervision

def calculate_ISS(row):
    
    """

   Description


    """

    provides_ART = 1 if row['R2312'] == 'YES' else 0
    provides_TB = 1 if row['R2400'] == 'YES' else 0
    provides_immu = 1 if row['R2100'] == 'YES' else 0
    provides_imci = 1 if row['R2000'] == 'YES' else 0
    provides_malaria = 1 if row['R1400'] == 'YES' else 0
    provides_hiv_test = 1 if row['R2300'] == 'YES' else 0
    provides_anc = 1 if row['R1810'] == 'YES' else 0
    provides_testing = 1 if row['R2900'] == 'YES' else 0
    provides_oxygen = 1 if row["R1323"] == 'YES' else 0

    htm = [provides_ART,provides_hiv_test,provides_TB,provides_malaria]
    all = [provides_ART,provides_hiv_test,provides_TB,provides_malaria,provides_anc,provides_imci,provides_immu]

    # Attribute 1 (Supervision in the past 3 months)
    if row['M610'] == 'NO':
        iss_score = 0
    elif row['M610'] == 'YES':
        iss_scores = []
        # Attribute 2 (Integrated supervision)
        if sum(htm) >= 3:
            iss_done  = sum(1 if any(row[f'tHFA_C00{i}_{j}'] == 'YES' for i in range(1, 7)) else 0 for j in ['A', 'B', 'C'])
            iss_scores.append(100) if iss_done == 3 else iss_scores.append(0)
        elif sum(all) >= 3:
            iss_done = sum(1 if any(row[f'tHFA_C00{i}_{j}'] == 'YES' for i in range(1, 7)) else 0 for j in ['A', 'B', 'C','D','E','F'])
            iss_scores.append(100) if iss_done >= 3 else iss_scores.append(0)
        else:
            iss_scores.append(np.nan)
        

        # Attribute 3 (Summary stats discussed)
        iss_scores.append(100) if row['tHFA_C007'] == 'YES' else iss_scores.append(0) 
    
        # Attribute 4 (Group problem solving)
        if row['tHFA_C008'] == 'YES' and row['tHFA_C009'] == 'YES':
            iss_scores.append(100)
        elif any([row['tHFA_C008'], row['tHFA_C009']]) == "DON’T KNOW":
            iss_scores.append(np.nan)
        else: 
            iss_scores.append(0)

        # Attribute 5 (Data on community activities)
        if row['tHFA_C010'] == 'NO':
            iss_scores.append(np.nan)
        elif row['tHFA_C010'] == 'YES' and row['tHFA_C011'] == 'DON’T KNOW':
            iss_scores.append(np.nan)
        elif row['tHFA_C010'] == 'YES' and row['tHFA_C011'] == 'NO':
            iss_scores.append(0)
        elif row['tHFA_C010'] == 'YES' and row['tHFA_C011'] == 'YES':
            iss_scores.append(100)
        else: 
            iss_scores.append(np.nan)

        iss_score = np.nanmean(iss_scores)
    else: iss_score
        



    return pd.Series({
        'iss_score': iss_score,
        'iss_check': iss_scores
        
        })


Unnamed: 0_level_0,iss_score,check
Q100,Unnamed: 1_level_1,Unnamed: 2_level_1
MWI_000193,100.000000,"[100, 100, 100, 100]"
MWI_001282,66.666667,"[100, 100, 0, nan]"
MWI_000166,100.000000,"[100, 100, 100, 100]"
MWI_001312,100.000000,"[100, 100, 100, 100]"
MWI_001109,100.000000,"[100, 100, 100, 100]"
...,...,...
MWI_001428,100.000000,"[100, 100, 100, 100]"
MWI_000122,75.000000,"[100, 100, 100, 0]"
MWI_000529,100.000000,"[100, 100, 100, 100]"
MWI_000496,100.000000,"[100, 100, 100, 100]"


In [None]:
df['M610'].unique()

In [None]:
def calculate_New_Fxn(row):
    
    """

   Description


    """

    provides_ART = 1 if row['R2312'] == 'YES' else 0
    provides_TB = 1 if row['R2400'] == 'YES' else 0
    provides_malaria = 1 if row['R1400'] == 'YES' else 0
    provides_hiv_test = 1 if row['R2300'] == 'YES' else 0
    provides_anc = 1 if row['R1810'] == 'YES' else 0
    provides_testing = 1 if row['R2900'] == 'YES' else 0
    provides_oxygen = 1 if row["R1323"] == 'YES' else 0

    # Attribute 1 (Integrated HTM with ANC)


    return pd.Series({
        'HTM-ANC': htm_anc_score,
        # 'check': htm_anc_scores
        
        })


see = test.apply(calculate_HTM_ANC, axis=1)
see

In [37]:
# See things
# System_CHW see
r = []
# q = ['13005_1',
# '13004_1',
# '13004_8',
# '13004_10',
# '13004_11',
# '13005_2',
# '13005_5']
r.extend(f'R1325_0{i}{j}' for i in ['4','5','6'] for j in ['A','B'])
# r

# # r.extend([f'tHFA_E0{i}_5' for i in [25,27,29]])
# # r.extend([f'tHFA_E0{a}_5' for a in range(20,24)])
# # r.extend([f'tHFA_E0{b}_5' for b in range(10,20)])
# i = 5
# for a in [3,5,7,9]:
#     r.extend([f'tHFA_E00{a}_A_{i}',f'tHFA_E00{a}_B_{i}', f'tHFA_E00{a}_C_{i}', f'tHFA_E00{a}_D_{i}',f'tHFA_E00{a}_E_{i}'])
# test[r]

# service availability
#  anc_count = sum(1 for col in ['RR_ANC_Begin_1', 'RR_ANC_Begin_2', 'RR_ANC_Begin_3', 'RR_ANC_Begin_4', 'RR_ANC_Begin_5'] if row[col] == 'YES')
# r.extend([[f'tHFA_D004_{i}', f'tHFA_D005_{i}']  for i in range(1,6)])
# r = [item for thing in r for item in thing]

# r.extend([f'Q{j}_5' for j in q])
# r.extend(['Q13408_08_1'])
# r.extend([f'EDL_TEST_0{i}' for i in range(1,10)])
# r.extend([f'EDL_TEST_{i}' for i in range(10,31)])
# r = [f'tHFA_C00{i}_{j}' for j in ['A','B','C','D', 'E', 'F'] for i in range(1,7)]
# r = [f'tHFA_C00{i}_{j}' for j in ['A','B','C'] for i in range(1,7) ]

# test['R2900']
# 'tHFA_C007',
# 'tHFA_C008',
# 'tHFA_C009',
# 'tHFA_C010',
# 'tHFA_C011'] 
df['R1322_09B'].unique()

# iss_done = sum(1 if any(df.iloc[45][f'tHFA_C00{i}_{j}'] == 'YES' for i in range(1, 7)) else 0 for j in ['A', 'B', 'C'])

# iss_done



array(['YES', nan, 'NO'], dtype=object)

In [59]:
functions = [
    calculate_service_quality,
    calculate_ISS,
    calculate_HTM_ANC,
    calculate_system_CHW,
    calculate_facility_composite,
    calculate_CHW_paid,
    calculate_CLM,
    calculate_guideline_availability,
    calculate_oxygen,
    calculate_testing_capacity,
    calculate_vacancy,    
    ]

In [60]:
# test apply the calculation to each row

test_indicators_df = test_fac_df
for func in functions: 
    see = test.apply(func, axis=1)
    test_indicators_df = pd.concat([test_indicators_df, see], axis=1)
test_indicators_df

  tb_trtmnt_score = (np.nansum(tb_trtmnt_scores)/tb_count) * 100
  art_retnt_score = np.nanmean(art_retnt_scores)
  score = np.nanmean([avl,avo])
  test_capac_score = np.nanmean(test_capac_scores)


Unnamed: 0_level_0,Q102,Q105,Q106,Q105_a,Q113,Q113_A,Q116,Q116_A,Q117,Q118,...,CHW_count,CHW_paid,CLM_score,guideline_score,check,oxygen_score,test-capac,vacancy_score,all_funded_posts,all_unfilled_posts
Q100,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MWI_000193,Chikwawa District Hospital,South West Zone,Chikwawa,SECONDARY,DISTRICT HOSPITAL,,LOCAL GOVERNMENT,,PERIURBAN,BOTH OUT AND INPATIENT,...,4.0,25.0,100.0,85.714286,"[100, 100, 100, 100, 100, 0, 100]",50.0,55.555556,52.662722,169.0,80.0
MWI_001282,Pirimiti Rural Hospital,South East Zone,Zomba,SECONDARY,HEALTH CENTRE,,MINISTRY OF HEALTH,,RURAL,BOTH OUT AND INPATIENT,...,5.0,80.0,66.666667,100.0,"[100, 100, 100, 100, 100, 100, 100]",100.0,42.307692,58.646617,133.0,55.0
MWI_000166,Chezi Mission Hospital,Central East Zone,Dowa,SECONDARY,HEALTH CENTRE,,CHRISTIAN HEALTH ASSOCIATION OF MALAWI (CHAM),,RURAL,BOTH OUT AND INPATIENT,...,5.0,100.0,100.0,100.0,"[100, nan, 100, 100, nan, 100, 100]",50.0,37.037037,82.142857,56.0,10.0
MWI_001312,Salima District Hospital,Central East Zone,Salima,SECONDARY,DISTRICT HOSPITAL,,MINISTRY OF HEALTH,,URBAN,BOTH OUT AND INPATIENT,...,5.0,100.0,83.333333,100.0,"[100, 100, 100, 100, 100, 100, 100]",100.0,62.962963,,999.0,999.0
MWI_001109,Namadidi Health Centre,South East Zone,Zomba,PRIMARY,HEALTH CENTRE,,MINISTRY OF HEALTH,,RURAL,OUTPATIENT ONLY,...,5.0,80.0,100.0,42.857143,"[0, 0, 0, 0, 100, 100, 100]",,,100.0,35.0,0.0


In [67]:
# FULL Apply the calculation to each row
indicators_df = facility_details_df

for func in functions: 
    see = df.apply(func, axis=1)
    indicators_df = pd.concat([indicators_df, see], axis=1)

indicators_df

  chw_contract = 100 if np.nanmean(contract_scores) == 100 else 0
  chw_score = np.nanmean(chw_scores)
  tb_trtmnt_score = (np.nansum(tb_trtmnt_scores)/tb_count) * 100
  art_retnt_score = np.nanmean(art_retnt_scores)
  chw_paid_score = np.nanmean(chw_paid_scores)
  score = np.nanmean([avl,avo])
  test_capac_score = np.nanmean(test_capac_scores)


Unnamed: 0_level_0,Q102,Q105,Q106,Q105_a,Q113,Q113_A,Q116,Q116_A,Q117,Q118,...,CHW_count,CHW_paid,CLM_score,guideline_score,check,oxygen_score,test-capac,vacancy_score,all_funded_posts,all_unfilled_posts
Q100,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MWI_000193,Chikwawa District Hospital,South West Zone,Chikwawa,SECONDARY,DISTRICT HOSPITAL,,LOCAL GOVERNMENT,,PERIURBAN,BOTH OUT AND INPATIENT,...,4.0,25.0,100.000000,85.714286,"[100, 100, 100, 100, 100, 0, 100]",50.0,55.555556,52.662722,169.0,80.0
MWI_001282,Pirimiti Rural Hospital,South East Zone,Zomba,SECONDARY,HEALTH CENTRE,,MINISTRY OF HEALTH,,RURAL,BOTH OUT AND INPATIENT,...,5.0,80.0,66.666667,100.000000,"[100, 100, 100, 100, 100, 100, 100]",100.0,42.307692,58.646617,133.0,55.0
MWI_000166,Chezi Mission Hospital,Central East Zone,Dowa,SECONDARY,HEALTH CENTRE,,CHRISTIAN HEALTH ASSOCIATION OF MALAWI (CHAM),,RURAL,BOTH OUT AND INPATIENT,...,5.0,100.0,100.000000,100.000000,"[100, nan, 100, 100, nan, 100, 100]",50.0,37.037037,82.142857,56.0,10.0
MWI_001312,Salima District Hospital,Central East Zone,Salima,SECONDARY,DISTRICT HOSPITAL,,MINISTRY OF HEALTH,,URBAN,BOTH OUT AND INPATIENT,...,5.0,100.0,83.333333,100.000000,"[100, 100, 100, 100, 100, 100, 100]",100.0,62.962963,,999.0,999.0
MWI_001109,Namadidi Health Centre,South East Zone,Zomba,PRIMARY,HEALTH CENTRE,,MINISTRY OF HEALTH,,RURAL,OUTPATIENT ONLY,...,5.0,80.0,100.000000,42.857143,"[0, 0, 0, 0, 100, 100, 100]",,,100.000000,35.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MWI_001428,Thyolo District Hospital,South West Zone,Thyolo,SECONDARY,DISTRICT HOSPITAL,,MINISTRY OF HEALTH,,URBAN,BOTH OUT AND INPATIENT,...,5.0,80.0,100.000000,100.000000,"[100, 100, 100, 100, 100, 100, 100]",,55.555556,,999.0,999.0
MWI_000122,Bwaila Hospital,Central West Zone,Lilongwe,SECONDARY,DISTRICT HOSPITAL,,MINISTRY OF HEALTH,,URBAN,BOTH OUT AND INPATIENT,...,5.0,100.0,100.000000,85.714286,"[100, 100, 100, 0, 100, 100, 100]",100.0,51.851852,,999.0,999.0
MWI_000529,Kamuzu Central Hospital,Central West Zone,Lilongwe,TERTIARY,CENTRAL HOSPITAL,,MINISTRY OF HEALTH,,URBAN,BOTH OUT AND INPATIENT,...,5.0,60.0,16.666667,100.000000,"[100, 100, 100, 100, 100, 100, 100]",,85.185185,,999.0,999.0
MWI_000496,Kalemba Community Hospital,South West Zone,Nsanje,SECONDARY,HEALTH CENTRE,,CHRISTIAN HEALTH ASSOCIATION OF MALAWI (CHAM),,RURAL,BOTH OUT AND INPATIENT,...,5.0,60.0,100.000000,100.000000,"[100, 100, 100, 100, 100, 100, 100]",50.0,48.148148,58.914729,129.0,53.0


In [None]:
df['R2900'].unique()

In [68]:
indicators_df.to_csv('./tHFA-malawi-analysis_v0.9.csv')

In [None]:
# Step 6: Apply the calculation to each row
indicators_df = df.apply(calculate_indicators, axis=1, result_type='expand')

In [None]:

# Step 7: Function to aggregate indicators by region and facility type
def aggregate_indicators(df, group_by_columns):
    return df.groupby(group_by_columns).sum()

# Example usage:
aggregated_by_region = aggregate_indicators(indicators_df, ['Region'])
aggregated_by_facility_type = aggregate_indicators(indicators_df, ['Facility Type'])


In [None]:
# Step 8: Save outputs to CSV files
metadata_df = pd.DataFrame(metadata)
metadata_df.to_csv('/mnt/data/metadata.csv', index=False)
survey_details_df.to_csv('/mnt/data/survey_details.csv', index=False)
indicators_df.to_csv('/mnt/data/indicators.csv', index=False)
aggregated_by_region.to_csv('/mnt/data/aggregated_by_region.csv', index=False)
aggregated_by_facility_type.to_csv('/mnt/data/aggregated_by_facility_type.csv', index=False)

print("Data processing completed and files saved.")
