In [13]:
client.models.list().page 

[Model(name='models/embedding-gecko-001', display_name='Embedding Gecko', description='Obtain a distributed representation of a text.', version='001', endpoints=None, labels=None, tuned_model_info=TunedModelInfo(base_model=None, create_time=None, update_time=None), input_token_limit=1024, output_token_limit=1, supported_actions=['embedText', 'countTextTokens'], default_checkpoint_id=None, checkpoints=None),
 Model(name='models/gemini-1.0-pro-vision-latest', display_name='Gemini 1.0 Pro Vision', description='The original Gemini 1.0 Pro Vision model version which was optimized for image understanding. Gemini 1.0 Pro Vision was deprecated on July 12, 2024. Move to a newer Gemini version.', version='001', endpoints=None, labels=None, tuned_model_info=TunedModelInfo(base_model=None, create_time=None, update_time=None), input_token_limit=12288, output_token_limit=4096, supported_actions=['generateContent', 'countTokens'], default_checkpoint_id=None, checkpoints=None),
 Model(name='models/gem

In [26]:
import re
import ast
from typing import List, Any

def extract_and_parse_multi_class_prompt_list(code: str) -> List[str]:
    """
    From a string of Python code, finds the first occurrence of
        = [ ... ]
    and parses that bracketed literal into a List[str].

    The string in the list contains a placeholder <>,

    Raises:
        ValueError if no list literal is found or it’s malformed,
                   or if the parsed list does not contain only strings.
    """
    # 1) Grab everything from the first '=' up to the matching ']'
    # This regex is made more robust to handle various whitespace and content within the list.
    m = re.search(r'=\s*(\[[^\]]*\])', code, re.DOTALL)
    if not m:
        raise ValueError("No list literal found after an '=' in the code")
    list_str = m.group(1)

    # 2) Safely evaluate it (only literals)
    try:
        data: Any = ast.literal_eval(list_str)
    except (SyntaxError, ValueError) as e:
        raise ValueError(f"Malformed list literal: {e}")

    # 3) Validate shape: ensure it's a list of strings
    if not isinstance(data, list) or not all(isinstance(item, str) for item in data):
        raise ValueError(
            "Parsed object is not a list of strings, or contains non-string elements."
        )

    # 4) Return the list of strings
    return data

code = """prompts: list[str] = [
    "An histopathological image of a <> region.",
    "The central area shows features consistent with <> tissue.",
    "This lymph node patch displays characteristics of a <> infiltrate.",
    "Observe the cellular morphology within this <> area.",
    "The predominant feature of this patch is <> architecture.",
    "The nuclear pleomorphism is indicative of a <> process.",
    "The staining intensity suggests a <> composition.",
    "The density of cells in this region is typical of <> tissue.",
    "The presence of mitotic figures suggests a <> state.",
    "The stromal reaction is characteristic of <> involvement.",
    "The overall pattern is consistent with a <> response.",
    "This patch exhibits features of <> lymphocytes.",
    "The arrangement of cells suggests a <> origin.",
    "The size and shape of nuclei are consistent with <> cells.",
    "The cytoplasm staining is indicative of a <> phenotype.",
    "The presence of necrosis suggests a <> condition.",
    "The inflammatory response appears to be <>.",
    "The vascularity within this region is typical of <> tissue.",
    "The fibrosis observed is consistent with <> changes.",
    "The cellularity is markedly increased in this <> area.",
    "The nuclei are hyperchromatic in this <> region.",
    "The architectural pattern is disrupted in the <> tissue.",
    "The stromal component is prominent in this <> patch.",
    "The overall impression is that of a <> process.",
    "The cells exhibit atypical features consistent with <> cells.",
    "The presence of giant cells suggests a <> reaction.",
    "The distribution of cells is uniform in this <> area.",
    "The nuclei are round and regular in this <> tissue.",
    "The cytoplasm is abundant in these <> cells.",
    "The staining is pale in this <> region.",
    "The cellular borders are distinct in this <> area.",
    "The nuclei are vesicular in this <> tissue.",
    "The architectural pattern is well-preserved in the <> tissue.",
    "The stromal component is sparse in this <> patch.",
    "The overall impression is that of a <> response.",
    "The cells exhibit features of <> differentiation.",
    "The presence of lymphocytes suggests a <> response.",
    "The distribution of cells is irregular in this <> area.",
    "The nuclei are irregular in shape in this <> tissue.",
    "The cytoplasm is scant in these <> cells.",
    "The staining is intense in this <> region.",
    "The cellular borders are indistinct in this <> area.",
    "The nuclei are densely packed in this <> tissue.",
    "The architectural pattern is disorganized in the <> tissue.",
    "The stromal component is dense in this <> patch.",
    "The overall impression is that of a <> process.",
    "The cells show evidence of <> activity.",
    "The presence of plasma cells suggests a <> response.",
    "The arrangement of cells is follicular in this <> area.",
    "The nuclei are enlarged in this <> tissue.",
    "The cytoplasm is eosinophilic in these <> cells.",
    "The staining is granular in this <> region.",
    "The cellular morphology is consistent with <> cells.",
    "The nuclei are binucleated in this <> tissue.",
    "The architectural pattern is nodular in the <> tissue."
]"""
extract_and_parse_multi_class_prompt_list(code=code)

['An histopathological image of a <> region.',
 'The central area shows features consistent with <> tissue.',
 'This lymph node patch displays characteristics of a <> infiltrate.',
 'Observe the cellular morphology within this <> area.',
 'The predominant feature of this patch is <> architecture.',
 'The nuclear pleomorphism is indicative of a <> process.',
 'The staining intensity suggests a <> composition.',
 'The density of cells in this region is typical of <> tissue.',
 'The presence of mitotic figures suggests a <> state.',
 'The stromal reaction is characteristic of <> involvement.',
 'The overall pattern is consistent with a <> response.',
 'This patch exhibits features of <> lymphocytes.',
 'The arrangement of cells suggests a <> origin.',
 'The size and shape of nuclei are consistent with <> cells.',
 'The cytoplasm staining is indicative of a <> phenotype.',
 'The presence of necrosis suggests a <> condition.',
 'The inflammatory response appears to be <>.',
 'The vascularit

ChatGPT

In [33]:
from openai import AzureOpenAI
from API_KEY import CHATGPT_API_KEY, CHATGPT_ENDPOINT
import json

# Initialize Azure OpenAI client with key-based authentication
client = AzureOpenAI(
    azure_endpoint=CHATGPT_ENDPOINT,
    api_key=CHATGPT_API_KEY,
    api_version="2025-01-01-preview",
)

# Prepare the chat prompt
prompt = "The task is to generate distinct textual descriptions pairs of visual discriminative features to identify whether the central region of a histopathological image patch contains tumor tissue or not. The patch is extracted from an H&E‑stained whole‑slide image of a lymph node section.\nHere are the best performing pairs in ascending order. High scores indicate higher quality visual discriminative features.\nCurrent Top 10 prompt pairs:\n1. ('Macrophages with tingible bodies are easily identified centrally.', 'Tingible body macrophages are scarce or absent amidst dense tumor population centrally.'), Score: 60\n2. ('No evidence of mitotic figures is visible in the central patch.', 'Frequent mitotic figures are observed centrally, indicative of high proliferative activity.'), Score: 68\n3. ('Central cytoplasm is basophilic and scant, consistent with reactive lymphocytes.', 'Central cells show abundant eosinophilic cytoplasm associated with tumor.'), Score: 70\n4. ('There is absence of central tissue edema or mucin pools.', 'Mucinous material or stromal edema is prominent in the central tumor region.'), Score: 71\n5. ('The cytoplasmic borders in the central field are sharp and well-defined.', 'Central tumor cells display indistinct, merged cytoplasmic borders due to syncytial growth.'), Score: 77\n6. ('Vascular structures in the central area are intact and regularly distributed.', 'Blood vessels in the center are distorted, congested, or invaded by tumor cells.'), Score: 80\n7. ('No mitotic figures are observed in the central lymphoid population.', 'Numerous atypical mitotic figures are present centrally within tumor cells.'), Score: 81\n8. ('No glandular or duct-like structures are formed in the central region.', 'Central tumor cells form abnormal glandular or duct-like structures indicative of malignancy.'), Score: 85\n9. ('No prominent nucleoli are visible in central cell nuclei.', 'Prominent, multiple nucleoli are present within most central tumor cell nuclei.'), Score: 87\n10. ('There is no mitotic activity noted in the central region.', 'Multiple mitotic figures are easily identified among central tumor cells.'), Score: 90\nWrite 10 new prompt pairs that are different to from the old ones and has a score as high as possible. First let's formulate a strategy to improve the score.\nOnly provide the output as Python code in the following format: prompts = list[tuple[negative: str, positive: str]]. Think step-by-step."
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": prompt
            }
        ]
    }
]

# Generate the completion
completion = client.chat.completions.create(
    model='gpt-4.1',
    messages=messages,
    max_tokens=5000,
    temperature=1.0,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)

# parse the response to JSON, then convert to dictionary object
response = json.loads(completion.to_json())['choices'][0]['message']['content']

print(response)


```python
# Strategy to Improve the Score:
# 1. Focus on features that are highly specific and mutually exclusive between benign/reactive and tumor tissue.
# 2. Use terminology that is precise, quantifiable, and based on well-established histopathological criteria.
# 3. Target features not already included above: nuclear size and contour, chromatin pattern, necrosis, apoptosis, cell cohesion, architectural disruption, cytoplasmic granularity, and specific patterns (e.g. starry sky, rosette, etc.).
# 4. Make sure each negative prompt describes absence or benign features and each positive prompt highlights abnormal or tumor-related features.
# 5. Avoid repetition from the top 10 pairs.

prompts = [
    # 1. Nuclear size and pleomorphism
    ("Central nuclei are small, round, and uniform in size and contour.", 
     "Central nuclei are markedly enlarged, pleomorphic, and display irregular nuclear contours."),
    
    # 2. Chromatin pattern
    ("Central cell nuclei exhibit fine, evenly d

In [31]:
import util

# Prepare the chat prompt
base_meta_prompt_template = """The task is to generate distinct textual descriptions pairs of visual discriminative features to identify whether the central region of a histopathological image patch contains tumor tissue or not. The patch is extracted from an H&E‑stained whole‑slide image of a lymph node section.
Here are the best performing pairs in ascending order. High scores indicate higher quality visual discriminative features. Each prompt should contain about 10 words.
{content}
{iteration_specific_instruction}
Only provide the output as Python code in the following format: prompts = list[tuple[negative: str, positive: str]]. Think step-by-step.
"""
content = """Current Top 10 prompt pairs:
1. ('Macrophages with tingible bodies are easily identified centrally.', 'Tingible body macrophages are scarce or absent amidst dense tumor population centrally.'), Score: 60
2. ('No evidence of mitotic figures is visible in the central patch.', 'Frequent mitotic figures are observed centrally, indicative of high proliferative activity.'), Score: 68
3. ('Central cytoplasm is basophilic and scant, consistent with reactive lymphocytes.', 'Central cells show abundant eosinophilic cytoplasm associated with tumor.'), Score: 70
4. ('There is absence of central tissue edema or mucin pools.', 'Mucinous material or stromal edema is prominent in the central tumor region.'), Score: 71
5. ('The cytoplasmic borders in the central field are sharp and well-defined.', 'Central tumor cells display indistinct, merged cytoplasmic borders due to syncytial growth.'), Score: 77
6. ('Vascular structures in the central area are intact and regularly distributed.', 'Blood vessels in the center are distorted, congested, or invaded by tumor cells.'), Score: 80
7. ('No mitotic figures are observed in the central lymphoid population.', 'Numerous atypical mitotic figures are present centrally within tumor cells.'), Score: 81
8. ('No glandular or duct-like structures are formed in the central region.', 'Central tumor cells form abnormal glandular or duct-like structures indicative of malignancy.'), Score: 85
9. ('No prominent nucleoli are visible in central cell nuclei.', 'Prominent, multiple nucleoli are present within most central tumor cell nuclei.'), Score: 87
10. ('There is no mitotic activity noted in the central region.', 'Multiple mitotic figures are easily identified among central tumor cells.'), Score: 90"""
iteration_specific_instruction = """Write {generate_n} new prompt pairs that are different to from the old ones and has a score as high as possible. Formulate a strategy to improve the score."""

prompt = base_meta_prompt_template.format(
    content=content,
    iteration_specific_instruction=iteration_specific_instruction.format(generate_n=10)
)

client = util.LLMClient(provider='azure_openai')
response = client.get_llm_response(prompt)
print(response)


```python
prompts = [
    ("Central region contains uniform, small lymphocytes with dark, round nuclei.",
     "Central area shows pleomorphic, irregular nuclei with increased size variability."),
    
    ("No necrotic debris is observed in the central patch region.",
     "Extensive central necrosis with karyorrhectic debris is present among tumor cells."),
    
    ("Central tissue architecture is preserved with normal lymphoid follicle formation.",
     "Central structure is effaced by sheets of densely packed tumor cells."),
    
    ("Central stroma is delicate and inconspicuous between lymphocytes.",
     "Desmoplastic or fibrous stroma is prominent around central tumor nests."),
    
    ("Nuclear chromatin in the center is coarse, but uniformly distributed.",
     "Central tumor nuclei exhibit hyperchromasia and irregular chromatin clumping."),
    
    ("Cellular outlines in the center are regular with minimal overlapping.",
     "Tumor cells centrally are overlapping, crowdi