# Replace labs from assigned to unassigned. 
In the case of enumerator being unable to carry out assignment.

In [None]:
# Set date
REPLACEMENT_DATE = "2025_10_08" # Date of running code, in yyyy_mm_dd format
rewrite_error = True # Set equal to false in case no rewrite error, if true will use original assignments file

In [2]:
# Set up
import pandas as pd
import numpy as np
import sys
from pathlib import Path
CODE_ROOT = Path.cwd().parents[1]
sys.path.append(str(CODE_ROOT))
import config

In [11]:
# Load datasets
labs_df = pd.read_csv(config.LABS_LIST / "LabsList_Randomized_Locations.csv")
existing_assignments = pd.read_csv(config.ENUMERATORS / "assignedlabs.csv")
unassigned_labs = pd.read_csv(config.LABS_LIST / "LabsList_Unassigned.csv")
replacements = pd.read_excel(config.LABS_LIST / "labs_to_replace_unassigned.xlsx")
original_assignments = pd.read_csv(config.ENUMERATORS / "assignedlabs_original.csv")

In [None]:
# Identify labs to replace as unassigned
labs_to_replace = []

for _, row in replacements.iterrows():
    enum_id = row["enum_id"]
    replace_all = row["replace_all"]
    lab = row["labgroupid"]
    
    if replace_all == 0 and (lab is None or pd.isna(lab)):
        print(f"Warning: enum_id {enum_id} has replace_all=0 but missing labgroupid. Skipping.")
        continue

    if replace_all == 1:
        # Replace all labs for that enum_id
        labs = existing_assignments.loc[
            existing_assignments["enum_id"] == enum_id, "labgroupid"
        ]
    else:
        # Replace only the specific labgroupid
        labs = existing_assignments.loc[
            (existing_assignments["enum_id"] == enum_id) &
            (existing_assignments["labgroupid"] == lab),
            "labgroupid"
        ]
    
    if labs.empty:
        print(f"Warning: enum_id {enum_id} not found in existing_assignments or labs already replaced. Skipping.")
        continue

    # Skip missing labs
    if not labs.empty:
        labs_to_replace.extend(labs.tolist())

labs_to_replace = list(set(labs_to_replace))

print("Labs to replace:", labs_to_replace)

Labs to replace: []


In [None]:
# Deal with error: existing assignment written over
if rewrite_error = True:
    labs_to_replace = []

    for _, row in replacements.iterrows():
        enum_id = row["enum_id"]
        replace_all = row["replace_all"]
        lab = row["labgroupid"]
        
        if replace_all == 0 and (lab is None or pd.isna(lab)):
            print(f"Warning: enum_id {enum_id} has replace_all=0 but missing labgroupid. Skipping.")
            continue

        if replace_all == 1:
            # Replace all labs for that enum_id
            labs = original_assignments.loc[
                original_assignments["enum_id"] == enum_id, "labgroupid"
            ]
        else:
            # Replace only the specific labgroupid
            labs = original_assignments.loc[
                (original_assignments["enum_id"] == enum_id) &
                (original_assignments["labgroupid"] == lab),
                "labgroupid"
            ]
        
        if labs.empty:
            print(f"Warning: enum_id {enum_id} not found in existing_assignments or labs already replaced. Skipping.")
            continue

        # Skip missing labs
        if not labs.empty:
            labs_to_replace.extend(labs.tolist())

    labs_to_replace = list(set(labs_to_replace))

    print("Labs to replace:", labs_to_replace)

Labs to replace: [480, 135, 971, 157, 830, 255]


In [16]:
# Remove from the existing assignments file

# Identify assignments to keep
new_existing_assignments = existing_assignments[
    ~existing_assignments["labgroupid"].isin(labs_to_replace)
]

# Order assignments by enumerator id and labgroupid
new_existing_assignments = new_existing_assignments.sort_values(by=["enum_id", "labgroupid"]).reset_index(drop=True)

# Save the assignments file
new_existing_assignments.to_csv(config.ENUMERATORS / "assignedlabs.csv", index=False)

In [17]:
# Replace in the unassigned labs file

# Identify the labs to replace
labs_being_replaced = labs_df[
    labs_df["labgroupid"].isin(labs_to_replace)
]

# Remove lab groups in institute with no labs
labs_being_replaced = labs_being_replaced[labs_being_replaced["Institute"] != "ICS"]

# Save the labs being replaced
labs_being_replaced.to_csv(config.LABS_LIST / f"replacedlabs_{REPLACEMENT_DATE}.csv", index=False)

# Add them to the unassigned labs
new_unassigned_labs = pd.concat([unassigned_labs, labs_being_replaced], ignore_index=True)

# Save the new list of unassigned labs
new_unassigned_labs.to_csv(config.LABS_LIST / "LabsList_Unassigned.csv", index=False)