In [35]:
import numpy as np
import pandas as pd
import random

In [36]:
# Number of patient records
N = 500

# Random seed for reproducibility
np.random.seed(42)
random.seed(42)

In [40]:
# Patient demographic features
ages = np.random.randint(18, 65, N)  # college to working-age adults
genders = np.random.choice([0, 1], N)  # 0=female, 1=male

# Symptom categories
symptoms = np.random.choice(
    ["fever", "cardiac", "sore throat", "headache", "nausea", "vomiting", "diarrhea", "sprain", "cut/laceration", "burn", "suspected fracture", "rash", "anxiety", "depression", "stress", "chest pain", "difficulty breathing", "allergic reaction"],
    N,
    p=[0.065, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055, 0.055]  # probabilities for diversity
)

# Urgency (1–5)
urgency = np.random.randint(1, 6, N)

# Time of day (0=morning, 1=afternoon, 2=night)
time_of_day = np.random.choice([0, 1, 2], N, p=[0.4, 0.4, 0.2])

# Center loads (current number of patients)
wait_load_A = np.random.randint(5, 25, N)
wait_load_B = np.random.randint(5, 25, N)
wait_load_C = np.random.randint(5, 25, N)


In [41]:
def match_specialty(symptom, center):
    mapping = {
        "fever": ["A"], "cough": ['B'], "sore throat": ["C"],
        "headache": ["A"], "nausea": ["B"], "vomiting": ["C"],
        "diarrhea": ["A"], "sprain": ["B"], "cut/laceration": ["C"],
        "burn": ["A"], "suspected fracture": ["B"], "rash": ["C"],
        "anxiety": ["A"], "depression": ["B"], "stress": ["C"],
        "chest pain": ["A"], "difficulty breathing": ["B"],
        "allergic reaction": ["C"]
    }
    return 1 if center in mapping.get(symptom, []) else 0

specialty_match_A = [match_specialty(sym, "A") for sym in symptoms]
specialty_match_B = [match_specialty(sym, "B") for sym in symptoms]
specialty_match_C = [match_specialty(sym, "C") for sym in symptoms]

In [42]:
df = pd.DataFrame({
        "age": ages,
        "gender": genders,
        "symptom_code": symptoms,
        "urgency": urgency,
        "time_of_day": time_of_day,
        "wait_load_A": wait_load_A,
        "wait_load_B": wait_load_B,
        "wait_load_C": wait_load_C,
        "specialty_match_A": specialty_match_A,
        "specialty_match_B": specialty_match_B,
        "specialty_match_C": specialty_match_C,
    })

df.to_csv("../data/priank/unlabeled_patient_data.csv", index=False)