# Drop non-responsive or non-existent labs from sample without reassignment.

In [1]:
# Set up
import pandas as pd
import numpy as np
import sys
from pathlib import Path
CODE_ROOT = Path.cwd().parents[1]
sys.path.append(str(CODE_ROOT))
import config
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
import os
from lab_assignment import assign_enumerators

In [2]:
# Load datasets
existing_assignments = pd.read_csv(config.ENUMERATORS / "assignedlabs.csv")
labs_to_remove_from_sample = pd.read_excel(config.LABS_LIST / "labs_to_remove_from_sample.xlsx")

In [3]:
# Flag labs as out of sample

# Add a new column if it doesn’t already exist
if "out_of_sample" not in existing_assignments.columns:
    existing_assignments["out_of_sample"] = 0

# Replace out of sample with 0 if missing
existing_assignments["out_of_sample"].fillna(0, inplace=True)

# Flag non-responsive or non-existent labs
existing_assignments.loc[
    existing_assignments["labgroupid"].isin(labs_to_remove_from_sample["labgroupid"]),
    "new_out_of_sample"] = 1
    
# Replace old out of sample with new values
existing_assignments.loc[existing_assignments["new_out_of_sample"] == 1, "out_of_sample"] = 1

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  existing_assignments["out_of_sample"].fillna(0, inplace=True)


In [4]:
# Reorder columns for saving assignments file
assignments_order = [
    "labgroupid", "Lab Group", "Faculty", "Institute", 
    "Professor", "Email", "Source", "Treatment Status", 
    "enum_id", "enum_firstname", "enum_lastname", 
    "enum_email", "out_of_sample"
]

# Save the assignments file
cols_to_save = [col for col in assignments_order if col in existing_assignments.columns]
existing_assignments.to_csv(config.ENUMERATORS / "assignedlabs.csv", index = False, columns = cols_to_save)