In [1]:
# Set up
import pandas as pd
import numpy as np
import sys
from pathlib import Path
CODE_ROOT = Path.cwd().parents[1]
sys.path.append(str(CODE_ROOT))
import config
from openpyxl import load_workbook
from openpyxl.formatting.rule import FormulaRule
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import range_boundaries
import os
import shutil
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [2]:
# Load data
labs = pd.read_csv(config.BL_RAW_SAMPLE / "final_sample_with_BL_file_status.csv")

In [3]:
# BL surveys folder
bl_surveys_folder = config.BL_RAW_SURVEY / "1_LabExcels"

# BL checklists folder
bl_checklists_folder = config.BL_RAW_CHECKLIST / "1_LabExcels"

# EL survey templates
el_survey_template_folder = config.EL_EMPTY_ROOT / "1_Templates"
el_template_treatment = el_survey_template_folder / "1_endline_template_treatment.xlsx"
el_template_control = el_survey_template_folder / "1_endline_template_control.xlsx"

In [4]:
# List of labs to create EL for
labs_create_el = labs[(labs["file_filled"] == True)]

# For each labgroupid, create the EL survey and save in individualized folder
for labgroupid, group in labs_create_el.groupby("labgroupid"):

    # Get enumerator info
    id = group["enum_id"].iloc[0]
    name = group["foldername"].iloc[0]
    folder_name = f"{name}_data"
    t_or_c = group["Treatment Status"].iloc[0]

    el_template = el_template_treatment if t_or_c == "treatment" else el_template_control

    # Define enumerator's data collection folder
    enum_folder = os.path.join(config.SWITCHDRIVE_ROOT, folder_name)

    # Create a subfolder for this labgroup inside the el folder
    lab_el_folder = os.path.join(enum_folder, "2_EL", f"EL_{labgroupid}")
    os.makedirs(lab_el_folder, exist_ok=True)

    # BL survey file path
    bl_survey = os.path.join(
        bl_surveys_folder,
        f"BL_{labgroupid}.xlsx"
    )

    if not os.path.exists(bl_survey):
        print(f"Missing BL survey for {labgroupid}")
        continue

    # Checklist file path
    bl_checklist = os.path.join(
        bl_checklists_folder,
        f"checklist_{labgroupid}.xlsx"
    )

    # Output calculator file path
    el_file = os.path.join(
        lab_el_folder,
        f"EL_{labgroupid}.xlsx"
    )

    # Copy the relevant template to create the EL file
    shutil.copy(el_template, el_file)

    # Load BL survey workbook and calculator workbook
    bl_wb = load_workbook(bl_survey, data_only=True)
    el_wb = load_workbook(el_file)

    # Define sheets and ranges to transfer
    sheet_mappings = {
        "2. Fume Cupboards": {"range": "C6:L17"},
        "3. Fridges (4°C)": { "range": "C6:L9"},
        "4. Freezers (-20°C)": {"range": "C6:V13"},
        "5. ULT Freezers": {"range": "C6:L14"},
        "6. Glassware Drying Cabinets": {"range": "C6:L15"},
        "7. Microbio Safety Cabinets": {"range": "C6:L14"},
        "8. Cryostats": {"range": "C6:L13"},
        "9. Water Baths": {"range": "C6:L16"},
        "10. CO2 Incubators": {"range": "C6:L11"},
        "11. Block Heaters": {"range": "C6:L14"},
        "12. IT Equipment": {"range": "C6:L14"},
    }

    # Transfer values
    for src_sheet, info in sheet_mappings.items():
        bl_ws = bl_wb[src_sheet]
        el_ws = el_wb[src_sheet]
        min_col, min_row, max_col, max_row = range_boundaries(info["range"])

        for row in range(min_row, max_row + 1):
            for col in range(min_col, max_col + 1):
                el_ws.cell(row=row, column=col).value = bl_ws.cell(row=row, column=col).value
        
        # Also copy cell "A3"
        el_ws["A3"].value = bl_ws["A3"].value

    # Save the updated EL file
    el_wb.save(el_file)

In [5]:
# Additionally for treatment labs, copy over the checklist
labs_create_el_treatment = labs_create_el[labs["Treatment Status"] == "treatment"]

# Record missing checklists
missing_checklists = []

# For each labgroupid, copy over the checklist
for labgroupid, group in labs_create_el_treatment.groupby("labgroupid"):

    # Get enumerator info
    id = group["enum_id"].iloc[0]
    name = group["foldername"].iloc[0]
    folder_name = f"{name}_data"

    # EL file path
    el_file = os.path.join(
        config.SWITCHDRIVE_ROOT, 
        folder_name,
        "2_EL",
        f"EL_{labgroupid}",
        f"EL_{labgroupid}.xlsx"
    )

    # Checklist file path
    bl_checklist = os.path.join(
        bl_checklists_folder,
        f"checklist_{labgroupid}.xlsx"
    )
    if not os.path.exists(bl_checklist):
        missing_checklists.append({"labgroupid": labgroupid, "enumerator": name})
        continue

    # Load checklist workbook and EL workbook
    checklist_wb = load_workbook(bl_checklist, data_only=True)
    el_wb = load_workbook(el_file)

    # Define sheets and ranges to transfer
    sheet_mappings = {
        "SPARK Checklist": {
            "dest_sheet": "15. SPARK Checklist",
            "ranges": ["C4:C70", "F4:F70"],
        }
    }

    # Transfer values
    for src_sheet, info in sheet_mappings.items():
        checklist_ws = checklist_wb[src_sheet]
        el_ws = el_wb[info["dest_sheet"]]

        for rng in info["ranges"]:
            min_col, min_row, max_col, max_row = range_boundaries(rng)

            for row in range(min_row, max_row + 1):
                for col in range(min_col, max_col + 1):

                    src_cell = checklist_ws.cell(row=row, column=col)

                    # Skip locked/protected cells
                    if src_cell.protection.locked:
                        continue

                    # Copy value if not locked
                    el_ws.cell(row=row, column=col).value = src_cell.value

    # Save the updated EL file
    el_wb.save(el_file)

# Save missing checklist report
if missing_checklists:
    missing_df = pd.DataFrame(missing_checklists)
    missing_df.to_csv(config.EL_EMPTY_ROOT / "missing_bl_checklists.csv", index = False)
    print("Saved missing checklists report.")
else:
    print("No missing checklists for EL treatment surveys.")

  labs_create_el_treatment = labs_create_el[labs["Treatment Status"] == "treatment"]


Saved missing checklists report.
