In [1]:
# Set up
import pandas as pd
import numpy as np
import sys
from pathlib import Path
CODE_ROOT = Path.cwd().parents[1]
sys.path.append(str(CODE_ROOT))
import config
import os

In [2]:
# Load data
labs = pd.read_csv(config.BL_RAW_SAMPLE / "final_sample_with_BL_file_status.csv")

In [3]:
# List of labs to install meters in
labs_for_meters = labs[(labs["file_filled"] == True)]

In [4]:
# Shuffle each group independently
treat = (labs_for_meters[labs_for_meters["Treatment Status"] == "treatment"]
         .sample(frac=1, random_state=42)
         .reset_index(drop=True))

control = (labs_for_meters[labs_for_meters["Treatment Status"] == "control"]
           .sample(frac=1, random_state=42)
           .reset_index(drop=True))

priority_rows = []

# Interleave treatment and control
min_len = min(len(treat), len(control))

for i in range(min_len):
    priority_rows.append(treat.iloc[[i]])
    priority_rows.append(control.iloc[[i]])

# Add the remainder from whichever group is larger
if len(treat) > len(control):
    remainder = treat.iloc[min_len:]
else:
    remainder = control.iloc[min_len:]

priority_rows.append(remainder)

# Convert to dataframe
priority_df = pd.concat(priority_rows, ignore_index=True)

# Add priority rank
priority_df["priority"] = range(1, len(priority_df) + 1)

In [5]:
# Save priority list to CSV

# Create contact person column which is "Contact person (if different)" if not missing, otherwise "Professor"
priority_df["Contact person"] = np.where(
    priority_df["Contact person (if different)"].notna(),
    priority_df["Contact person (if different)"],
    priority_df["Professor"]
)

# Create contact email column which is "Contact email (if different)" if not missing, otherwise "Email"
priority_df["Contact email"] = np.where(
    priority_df["Contact email (if different)"].notna(),
    priority_df["Contact email (if different)"],
    priority_df["Email"]
)

# Keep only relevant columns
cols_to_keep = [
    "labgroupid", "Lab Group", "Faculty", "Institute",
      "Contact person", "Contact email",
      "Treatment Status", "Comments?", "priority"
]

# Save to CSV
priority_df[cols_to_keep].to_csv(
    config.ONEDRIVE_ROOT / 
    "15_Metering" / 
    "1_Labs_List" / 
    "labs_priority_list.csv", 
    index=False)