In [None]:
stain_name = "Jones Silver"

import json
stain_configurations = {
    "Masson Trichrome": {
        "nuclei": {
            "color": {"R": 30, "G": 114, "B": 201},
            "description": (
                "Uniform nuclear staining using markers that label all nuclei."
            )
        },
        "muscle": {
            "color": {"R": 220, "G": 67, "B": 51},
            "description": (
                "Muscle fibers along with their associated cytoplasmic components showing the structure of smooth muscle and myocytes."
            )
        },
        "collagen": {
            "color": {"R": 93, "G": 209, "B": 225},
            "description": (
                "Specifically highlights collagen and connective tissue."
            )
        },
        "erythrocytes": {
            "color": {"R": 229, "G": 128, "B": 56},
            "description": (
                "Red blood cells."
            )
        }
    },
    "Periodic acid-Schiff": {
        "nuclei": {
            "color": {"R": 50, "G": 84, "B": 210},
            "description": (
                "Uniform nuclear staining using markers that label all nuclei."
            )
        },
        "polysaccharides": {
            "color": {"R": 180, "G": 80, "B": 208},
            "description": (
                "This class includes mucins, glycogen."
                "If no specific polysaccharide markers are available, assign markers for basement membrane proteins "
                "such as Collagen IV or Laminin."
            )
        },
        "stroma": {
            "color": {"R": 227, "G": 186, "B": 225},
            "description": (
                "Stroma or cytoplasm that does not exhibit strong carbohydrate positivity."
            )
        }
    },
    "Jones Silver": {
        "membranes": {
            "color": {"R": 0, "G": 0, "B": 0},
            "description": (
                "Basement membranes or related connective tissue structures and reticulin fibers."  
            )
        },
        "stroma": {
            "color": {"R": 133, "G": 227, "B": 200},
            "description": (
                "The fibrous extracellular matrix and interstitial cytoplasm, excluding markers that show cell nuclei or cell membranes." 
            )
        },
    },
    "Toluidine Blue": {
        "nuclei": {
            "color": {"R": 14, "G": 21, "B": 198},
            "description": (
                "Uniform nuclear staining using markers that label all nuclei."
            )
        },
        "stroma": {
            "color": {"R": 16, "G": 193, "B": 251},
            "description": (
                "Less dense cytoplasmic or stromal regions, reflecting lower dye binding compared to nuclei."
            )
        },
        "metachromasia": {
            "color": {"R": 167, "G": 154, "B": 254},
            "description": (
                    "Metachromatic shift seen in structures with high concentrations of acidic mucopolysaccharides, "
                    "such as mast cell granules or cartilage matrix."
            )
        },
    },
    "H&E": {
        "nuclei": {
            "color": {"R": 72, "G": 61, "B": 139},
            "description": (
                "Uniform nuclear staining using markers that label all nuclei."
            )
        },
        "eosinophilic": {
            "color": {"R": 255, "G": 182, "B": 193},
            "description": (
                "The extracellular matrix and mesenchymal cytoplasm, capturing connective tissue and muscle components."
            )
        },
        "epithelial": {
            "color": {"R": 199, "G": 143, "B": 187},
            "description": (
                "Densely stained, protein-rich epithelial tissue."
            )
        },
        "erythrocytes": {
            "color": {"R": 186, "G": 56, "B": 69},
            "description": (
                "Red blood cells."
            )
        }
    }
}


channel_names = ['141Pr_aSMA', '142Nd_CAV1',
                 '143Nd_VIM', '144Nd_pro-SPC', '145Nd_pS6', '146Nd_Col1A1', '147Sm_SOX9', '148Nd_panCK', '149Sm_CD11b',
                 '150Nd_FN', '151Eu_ICAM1', '152Sm_GFP', '153Eu_CD44', '154Sm_CD11c', '156Gd_CD206',
                 '158Gd_NFIB', '160Gd_Ter119', '161Dy_AQP1', '162Dy_CD31', '163Dy_Ly6G', '164Dy_PDPN',
                 '165Ho_B-Catenin', '166Er_GLUT1', '167Er_NPHS2', '168Er_ki67', '169Tm_LYVE1', '170Er_SerpinE2',
                 '171Yb_BCL2', '172Yb_Casp3', '173Yb_ASNS', '174Yb_Desmin', '175Lu_eCadherin', '176Yb_EPCAM', 
                 '190BCKG', '191Ir_DNA', '193Ir_DNA']


def get_prompt(stain_name):

    channels_str = ", ".join(channel_names)
    stain_conf_str = json.dumps(stain_configurations[stain_name], indent=4)

    prompt = (
        f"Consider the following channel names in a multiplexed image which represent the markers: {channels_str}.\n\n"
        f"You are an expert in digital pathology, proteomics, multiplex imaging, and spatial omics. "
        f"I want to convert this multiplexed image into a pseudo {stain_name} image using a physical stain model. "
        f"This model simulates light absorption using an exponential attenuation function where the optical density of each pixel "
        f"is computed by combining the intensity of selected markers with specific colors, "
        f"mimicking traditional brightfield staining.\n\n"
        f"Below is the stain configuration for {stain_name}, where each tissue class is defined by a color, name and a description:\n\n"
        f"{stain_conf_str}\n\n"
        f"Assign the provided channels to represent the tissue classes in this configuration.\n"
        f"- For each tissue class, if a marker is expressed in the tissue described by the class description, include that marker/channel name in that class, "
        f"even if it is not perfectly specific for that tissue type.\n"
        f"- Try to select at least one marker for each tissue class.\n"
        f"- For the 'nuclei' class, only include markers that uniformly stain all nuclei, and exclude markers that stain only subsets of nuclei.\n"
        f"- For any structural tissue class, include only markers that label structural components, not those that are only expressed in individual cells.\n"
        f"- Do not assign the same marker to multiple classes.\n"
        f"- Markers should be added in the order that the classes appears. Once a marker is assigned to an earlier class, it cannot be reassigned to a later class.\n"
        f"- Do not use channels that are controls or do not appear to be linked to a protein or other specific marker.\n\n"
        f"Double-check your response for accuracy and consistency. Return your answer as a JSON object with keys corresponding to the class names "
        f"and values as lists of channel names."
    )
    return prompt

# prompt = get_prompt("Masson Trichrome")
# prompt = get_prompt("Periodic acid-Schiff")
# prompt = get_prompt("Jones Silver")
# prompt = get_prompt("Toluidine Blue")
prompt = get_prompt("H&E")

print(prompt)

In [None]:
import json

STAINS = {
    "H&E": {
        "ChatGPT": "LLM_data/H&EChatGPT.json",
        "Gemini":  "LLM_data/H&EGemini.json",
        "Claude":  "LLM_data/H&EClaude.json",
    },
    "Toluidine Blue": {
        "ChatGPT": "LLM_data/ToluidineBlueChatGPT.json",
        "Gemini":  "LLM_data/ToluidineBlueGemini.json",
        "Claude":  "LLM_data/ToluidineBlueClaude.json",
    },
    "Jones Silver": {
        "ChatGPT": "LLM_data/JonesSilverChatGPT.json",
        "Gemini":  "LLM_data/JonesSilverGemini.json",
        "Claude":  "LLM_data/JonesSilverClaude.json",
    },
    "Periodic acid-Schiff": {
        "ChatGPT": "LLM_data/PeriodicAcid-SchiffChatGPT.json",
        "Gemini":  "LLM_data/PeriodicAcid-SchiffGemini.json",
        "Claude":  "LLM_data/PeriodicAcid-SchiffClaude.json",
    },
    "Masson Trichrome": {
        "ChatGPT": "LLM_data/MassonTrichromeChatGPT.json",
        "Gemini":  "LLM_data/MassonTrichromeGemini.json",
        "Claude":  "LLM_data/MassonTrichromeClaude.json",
    },
}

def load_json(p):
    with open(p, "r") as f:
        return json.load(f)

def build_sets_by_class(d, classes):
    out = {c: set() for c in classes}
    for c in classes:
        out[c].update(d.get(c, []))
    return out

def consensus_jaccard(paths):
    models = {name: load_json(path) for name, path in paths.items()}
    classes = sorted(set().union(*[set(m.keys()) for m in models.values()]) - {"Unassigned"})
    if not classes:
        return None
    sets_by_model = {name: build_sets_by_class(m, classes) for name, m in models.items()}
    names = list(sets_by_model.keys())
    vals = []
    for cls in classes:
        all_ch = set().union(*[sets_by_model[n][cls] for n in names])
        consensus = {ch for ch in all_ch if sum(ch in sets_by_model[n][cls] for n in names) >= 2}
        for n in names:
            A = sets_by_model[n][cls]
            U = A | consensus
            if not U:
                continue
            vals.append(len(A & consensus) / len(U))
    if not vals:
        return None
    return sum(vals) / len(vals)


for stain, paths in STAINS.items():
    j = consensus_jaccard(paths)
    print(f"{stain}: {j:.3f}" if j is not None else f"{stain}: NA")


bla

In [None]:
import json

def consensus_jaccard_three(chatgpt_path, gemini_path, claude_path):
    """
    Return the Consensus Jaccard (mean Jaccard of each model vs majority-vote consensus,
    averaged over classes), excluding 'Unassigned'. If nothing to compare, returns None.
    """
    # load files
    with open(chatgpt_path, "r") as f:
        chatgpt = json.load(f)
    with open(gemini_path, "r") as f:
        gemini = json.load(f)
    with open(claude_path, "r") as f:
        claude = json.load(f)

    # collect classes (exclude 'Unassigned')
    classes = set()
    for k in chatgpt.keys():
        if k != "Unassigned":
            classes.add(k)
    for k in gemini.keys():
        if k != "Unassigned":
            classes.add(k)
    for k in claude.keys():
        if k != "Unassigned":
            classes.add(k)
    classes = list(classes)
    if len(classes) == 0:
        return None

    # build sets per model per class
    s_chatgpt = {}
    s_gemini = {}
    s_claude = {}
    for c in classes:
        s_chatgpt[c] = set(chatgpt.get(c, []))
        s_gemini[c]  = set(gemini.get(c, []))
        s_claude[c]  = set(claude.get(c, []))

    # compute mean Jaccard(model, consensus) over models and classes
    vals = []
    for c in classes:
        # union of all channels proposed for this class
        all_ch = set()
        for ch in s_chatgpt[c]:
            all_ch.add(ch)
        for ch in s_gemini[c]:
            all_ch.add(ch)
        for ch in s_claude[c]:
            all_ch.add(ch)

        # majority-vote consensus (>= 2 of 3 models)
        consensus = set()
        for ch in all_ch:
            votes = 0
            if ch in s_chatgpt[c]:
                votes += 1
            if ch in s_gemini[c]:
                votes += 1
            if ch in s_claude[c]:
                votes += 1
            if votes >= 2:
                consensus.add(ch)

        # Jaccard for each model vs consensus
        # ChatGPT
        U = s_chatgpt[c].union(consensus)
        if len(U) > 0:
            I = s_chatgpt[c].intersection(consensus)
            vals.append(len(I) / len(U))
        # Gemini
        U = s_gemini[c].union(consensus)
        if len(U) > 0:
            I = s_gemini[c].intersection(consensus)
            vals.append(len(I) / len(U))
        # Claude
        U = s_claude[c].union(consensus)
        if len(U) > 0:
            I = s_claude[c].intersection(consensus)
            vals.append(len(I) / len(U))

    if len(vals) == 0:
        return None
    return sum(vals) / len(vals)


j = consensus_jaccard_three("LLM_data/MassonTrichromeChatGPT.json", "LLM_data/MassonTrichromeGemini.json", "LLM_data/MassonTrichromeClaude.json")
print(f"Masson Trichrome {j:.3f}")
j = consensus_jaccard_three("LLM_data/PeriodicAcid-SchiffChatGPT.json", "LLM_data/PeriodicAcid-SchiffGemini.json", "LLM_data/PeriodicAcid-SchiffClaude.json")
print(f"Periodic acid-Schiff {j:.3f}")
j = consensus_jaccard_three("LLM_data/JonesSilverChatGPT.json", "LLM_data/JonesSilverGemini.json", "LLM_data/JonesSilverClaude.json")
print(f"Jones Silver {j:.3f}")
j = consensus_jaccard_three("LLM_data/ToluidineBlueChatGPT.json", "LLM_data/ToluidineBlueGemini.json", "LLM_data/ToluidineBlueClaude.json")
print(f"Toluidine Blue {j:.3f}")
j = consensus_jaccard_three("LLM_data/H&EChatGPT.json", "LLM_data/H&EGemini.json", "LLM_data/H&EClaude.json")
print(f"H&E {j:.3f}")
