# Check status of EL data collection

In [1]:
# Set up
import pandas as pd
import numpy as np
import sys
from pathlib import Path
CODE_ROOT = Path.cwd().parents[1]
sys.path.append(str(CODE_ROOT))
import config
from openpyxl import load_workbook
from openpyxl.formatting.rule import FormulaRule
from openpyxl.styles import Font, PatternFill
import os
import shutil
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [2]:
# Load dataset of all sample labs
labs = pd.read_csv(config.EL_RAW_SAMPLE / "final_sample_with_EL_file_status.csv")

# Load excel sheet of attrition labs
el_attrition = pd.read_excel(config.ENUMERATORS / "EL_visits_completed.xlsx", sheet_name = "Attrition")

# Load excel sheet of stated completed visit
stated_visited = pd.read_excel(config.ENUMERATORS / "EL_visits_completed.xlsx", sheet_name = "Visit completed")

# Load excel sheet of in progress labs
in_progress = pd.read_excel(config.ENUMERATORS / "EL_visits_completed.xlsx", sheet_name = "To pursue")

# Load excel sheet of email confirmation only labs
email_confirmation = pd.read_excel(config.ENUMERATORS / "EL_visits_completed.xlsx", sheet_name = "No visit email conf")

In [3]:
# Check how many labs have EL awareness filled but EL date not filled
labs_with_awareness_no_date = labs[
    (labs["el_awareness_filled"] == True) &
    (labs["el_date_filled"] == False)
]

print(f"Number of labs with EL awareness filled but EL date not filled: {len(labs_with_awareness_no_date)}")

# Check how many labs have EL date filled but EL awareness not filled
labs_with_date_no_awareness = labs[
    (labs["el_awareness_filled"] == False) &
    (labs["el_date_filled"] == True)
]

print(f"Number of labs with EL date filled but EL awareness not filled: {len(labs_with_date_no_awareness)}")

Number of labs with EL awareness filled but EL date not filled: 0
Number of labs with EL date filled but EL awareness not filled: 0


In [4]:
# Create indicator cols for each category in excel sheets
labs["attrition"] = labs["labgroupid"].isin(el_attrition["labgroupid"]).astype(bool)
labs["stated_completed_visit"] = labs["labgroupid"].isin(stated_visited["labgroupid"]).astype(bool)            
labs["in_progress"] = labs["labgroupid"].isin(in_progress["labgroupid"]).astype(bool)
labs["email_confirmation_only"] = labs["labgroupid"].isin(email_confirmation["labgroupid"]).astype(bool)

In [5]:
# Check whether any labs are stated visited and not awareness filled
stated_visited_no_awareness = labs[
    (labs["stated_completed_visit"] == True) &
    (labs["el_awareness_filled"] == False)
]
print(f"Number of labs stated visited but no awareness filled: {len(stated_visited_no_awareness)}")

# Create follow up list for labs stated visited but no awareness filled
follow_up_labs = stated_visited_no_awareness[["labgroupid", "enum_lastname"]]

Number of labs stated visited but no awareness filled: 0


In [6]:
# Add column for unexplained labs (not in progress, not attrition, not email confirmation, not filled)
labs["unexplained"] = ~(
    labs["in_progress"] |
    labs["attrition"] |
    labs["email_confirmation_only"] |
    labs["el_awareness_filled"]
)

In [7]:
# Create summary dataframe of no of total labs, no of completed EL labs, no of unexplained uncompleted per enumerator
enumerator_summary = labs.groupby("enum_id").agg(
    enum_firstname=("enum_firstname", "first"),
    enum_lastname=("enum_lastname", "first"),
    total_labs=("labgroupid", "count"),
    completed_awareness=("el_awareness_filled", "sum"),
    unexplained_uncompleted=("unexplained", "sum")
).reset_index()

enumerator_summary["completion_rate"] = (enumerator_summary["completed_awareness"]   
                                         / enumerator_summary["total_labs"]*100).round(1)

enumerator_summary["unexplained_rate"] = (enumerator_summary["unexplained_uncompleted"]
                                            / enumerator_summary["total_labs"]*100).round(1)