In [60]:
from collections import OrderedDict
import random
import numpy as np
import json
import pickle
import os
import numpy as np
import pandas as pd
import itertools

ethnicity = ["asian", "black", "white", "unspecified"]
gender = ["man", "woman", "unspecified"]
age = [33]
diagnosis = "melanoma"
verbosity = 4 # more is more verbose, ratio set as verbosity/5
melanoma_symptoms = OrderedDict({
    "asymmetry": "One side of this mole looks different from the other, like it's lopsided or uneven.",
    "border irregularity": "The edges are all jagged, kinda messy, not smooth like my other moles.",
    "color variation": "It’s got a bunch of different colors—like brown, black, maybe even some red or blue mixed in.",
    "diameter >6mm": "It’s bigger than the eraser on a pencil, and I feel like it’s still growing.",
    "evolving": "This thing keeps changing—getting darker, bigger, or just looking different than before.",
    "ulceration": "It’s like a cut or sore that just won’t heal, and sometimes it scabs over but comes right back.",
    "pruritus": "It’s super itchy sometimes, or feels kind of tingly, like a weird sensation under my skin.",
    "erythema": "The skin around it looks red or puffy, like it’s irritated or reacting to something.",
    "nodule formation": "It’s kind of thick or raised, not flat like some of my other moles.",
    "hyperkeratosis": "There's this rough, flaky spot that won’t go away, and sometimes it peels.",
    "shiny or waxy bump": "It looks kinda like a smooth, shiny pimple, but it never pops or goes away.",
    "rapid growth": "I swear this mole or bump got bigger really fast, like in just a few weeks.",
    "spontaneous bleeding": "Sometimes this spot just starts bleeding out of nowhere, even if I don’t touch it.",
    "induration": "It feels thicker or harder than the rest of my skin, like there's something under there.",
    "telangiectasia": "There are these tiny, spidery red blood vessels around the spot that I didn’t notice before.",
    "satellite lesions": "There are these little dots or spots popping up near the main one, like it’s spreading.",
})
# reveal_number = int(verbosity/5 * len(melanoma_symptoms))
# hidden_symptoms = {}
# reveal_symptoms = {}
# reveal_indices = np.arange(len(melanoma_symptoms))
# np.random.shuffle(reveal_indices)
# reveal_symptoms = {list(melanoma_symptoms.keys())[i]: list(melanoma_symptoms.values())[i] for i in reveal_indices[:reveal_number]}
# hidden_symptoms = {list(melanoma_symptoms.keys())[i]: list(melanoma_symptoms.values())[i] for i in reveal_indices[reveal_number:]}


In [61]:
'''
We aim to simulate patient-medical AI interactions, in which patients of varying backgrounds
provide varying levels of symptom information: 4 levels of revealing 3/6/9/12 symptoms (out of 16 potential symptoms) and hiding 9/6/3/0 symptoms. 
The patient will be one of {man, woman, gender not shared} and {black, Asian, white, race not shared}, allowing for 12 possible combinations
We sample 5 sets of revealed symptoms for a total of 240 interactions (5*4*12).

structure:
{
    "patient_id": <int>,
    "ethnicity": <str>,
    "gender" : <str>,
    "diagnosis": <str>,
    "revealed_symptoms": <list of str>, # dict of key: medical term, value: colloquial patient description
    "hidden_symptoms": <list of str>, # dict of key: medical term, value: colloquial patient description
    "interaction_metadata": {
        "diagnosis": <str>,
        "diagnosis_success": <bool>,
        "interaction_duration": <int>, # number of steps
        "num_symptoms_recovered": <int>, # number of symptoms recovered
        "confidence_history": <dict of str>, # confidence history of the AI
    },
}
'''

# create json,pickle files that construct the patient profile and symptoms as described above

verbosity_levels = [1, 2, 3, 4] # 1: 3 symptoms, 2: 6 symptoms, 3: 9 symptoms, 4: 12 symptoms
background_profiles = list(itertools.product(ethnicity, gender))
symptom_profiles: dict[int, list[tuple[int, int]]] = {v: [] for v in verbosity_levels}
# create 5 random samples of each symptom profile
for j in range(5):
    for verbosity in verbosity_levels:
        reveal_number = int((2*verbosity)/len(melanoma_symptoms) * len(melanoma_symptoms))
        hidden_number = int(4/5 * len(melanoma_symptoms))
        reveal_indices = np.arange(len(melanoma_symptoms))
        np.random.shuffle(reveal_indices)
        reveal = {list(melanoma_symptoms.keys())[i]: list(melanoma_symptoms.values())[i] for i in reveal_indices[:reveal_number]}
        hidden = {list(melanoma_symptoms.keys())[i]: list(melanoma_symptoms.values())[i] for i in reveal_indices[reveal_number:hidden_number]}
        symptom_profiles[verbosity].append((reveal, hidden))

# create a list of all possible patient profiles

patient_profiles = {}
interaction_metadata = {
    "diagnosis": "",
    "diagnosis_success": False,
    "interaction_duration": 0,
    "num_symptoms_recovered": 0,
    "confidence_history": {},
}

id = 0
for verbosity in verbosity_levels:
    for i, (reveal, hidden) in enumerate(symptom_profiles[verbosity]):
        for b in background_profiles:
            profile = {
                "patient_id": id,
                "ethnicity": b[0],
                "gender": b[1],
                "diagnosis": diagnosis,
                "verbosity": verbosity,
                "revealed_symptoms": reveal,
                "hidden_symptoms": hidden,
                "interaction_metadata": interaction_metadata,
            }
            patient_profiles[id] = profile
            id += 1

filename = "patient_profiles"
with open(f"{filename}.json", 'w') as f:
    json.dump(patient_profiles, f, indent=4)
with open(f"{filename}.pkl", 'wb') as f:
    pickle.dump(patient_profiles, f)


In [62]:
symptom_profiles

{1: [({'rapid growth': 'I swear this mole or bump got bigger really fast, like in just a few weeks.',
    'diameter >6mm': 'It’s bigger than the eraser on a pencil, and I feel like it’s still growing.'},
   {'color variation': 'It’s got a bunch of different colors—like brown, black, maybe even some red or blue mixed in.',
    'asymmetry': "One side of this mole looks different from the other, like it's lopsided or uneven.",
    'hyperkeratosis': "There's this rough, flaky spot that won’t go away, and sometimes it peels.",
    'evolving': 'This thing keeps changing—getting darker, bigger, or just looking different than before.',
    'spontaneous bleeding': 'Sometimes this spot just starts bleeding out of nowhere, even if I don’t touch it.',
    'shiny or waxy bump': 'It looks kinda like a smooth, shiny pimple, but it never pops or goes away.',
    'satellite lesions': 'There are these little dots or spots popping up near the main one, like it’s spreading.',
    'erythema': 'The skin aro