In [27]:
import util

from API_KEY import GEMINI_API_KEY
import re
import ast
from typing import List, Any

from google import genai

client = genai.Client(api_key=GEMINI_API_KEY)

response = client.models.generate_content(
    model="gemini-2.0-flash", contents="""Please follow the instruction step-by-step to generate a better prompt pair.
1. Cross over the following prompts and generate a new prompt:

Prompt Pair 1: ('Small mature lymphocytes centrally show no indication of active replication.', 'Large immature cells centrally demonstrate ongoing active replication.')
Prompt Pair 2: ('Small, mature lymphocytes centrally show no active processes of chromosome separation.', 'Large, immature cells centrally demonstrate active processes of chromosome separation.')

2. Mutate the prompt generated in Step 1 and generate a final prompt pair in a python tuple (str, str)""")
print(response.text)

Okay, let's follow the steps.

**Step 1: Crossover**

Let's identify key phrases and components to crossover. We want to combine elements of both descriptions (mature vs. immature cells, presence or absence of replication/chromosome separation, and the descriptive wording).

A reasonable crossover could be:

* **Positive Prompt (Immature):** "Large, immature lymphocytes centrally demonstrate ongoing active replication and active processes of chromosome separation."
* **Negative Prompt (Mature):** "Small, mature lymphocytes centrally show no indication of active replication and no active processes of chromosome separation."

**Step 2: Mutation**

Now we'll mutate the prompts from Step 1.  Mutation should introduce small, yet impactful changes to the phrasing and description. I'll aim for changes that alter the emphasis or subtly clarify the intent. Here's how I'll mutate them:

*   **Change 1:** Replace "ongoing active replication" with "evidence of rapid replication."
*   **Change 2:**

In [22]:
def extract_and_parse_prompt_list(code: str) -> List[Tuple[str, str]]:
    """
    From a string of Python code, finds the first occurrence of
        = [ ... ]
    and parses that bracketed literal into a List[Tuple[str,str]].

    Raises:
        ValueError if no list literal is found or it’s malformed.
    """
    # 1) grab everything from the first '=' up to the matching ']' 
    m = re.search(r'=\s*(\[\s*[\s\S]*?\])', code)
    if not m:
        raise ValueError("No list literal found after an '=' in the code")
    list_str = m.group(1)

    # 2) safely evaluate it (only literals)
    try:
        data: Any = ast.literal_eval(list_str)
    except (SyntaxError, ValueError) as e:
        raise ValueError(f"Malformed list literal: {e}")

    # 3) validate shape
    if not isinstance(data, list) or not all(
        isinstance(item, (list, tuple)) and len(item) == 2 for item in data
    ):
        raise ValueError("Parsed object is not a list of 2-element lists/tuples")

    # 4) convert to List[Tuple[str,str]]
    return [(str(a), str(b)) for a, b in data]

def extract_and_parse_prompt_tuple(code: str) -> Tuple[str, str]:
    """
    From a string of Python code, finds the first literal tuple of two strings
    (e.g. ("neg prompt","pos prompt")) and returns it as (str, str).

    Raises:
        ValueError if no suitable 2-element string tuple is found.
    """
    # Parse into an AST
    tree = ast.parse(code)

    # Walk the tree looking for a Tuple node with exactly two string constants
    for node in ast.walk(tree):
        if isinstance(node, ast.Tuple) and len(node.elts) == 2:
            a, b = node.elts
            if (
                isinstance(a, ast.Constant) and isinstance(a.value, str)
                and isinstance(b, ast.Constant) and isinstance(b.value, str)
            ):
                return (a.value, b.value)

    raise ValueError("No 2-element string tuple found in code")

import io
import tokenize
import json
from typing import Tuple, List

def _force_double_quotes(code: str) -> str:
    """
    Rewrites every Python string-literal in `code` to use double-quotes,
    properly handling apostrophes and other special characters.
    """
    tokens = tokenize.generate_tokens(io.StringIO(code).readline)
    new_tokens = []
    for toknum, tokval, start, end, line in tokens:
        if toknum == tokenize.STRING:
            # Get the actual string value
            value = ast.literal_eval(tokval)

            # Create a new string literal with double quotes
            # Properly escape any double quotes or backslashes in the string
            # This automatically handles escaping correctly
            tokval = json.dumps(value)

        new_tokens.append((toknum, tokval))
    return tokenize.untokenize(new_tokens)


def get_prompt_pairs(prompt, client, parse_func=extract_and_parse_prompt_list,  max_retries=10) -> List[Tuple[str, str]]:
    for attempt in range(1, max_retries + 1):
        try:
            response = client.generate_content(prompt)
            raw = response.text
            print(f"Raw response on attempt {attempt}: {raw}...")

            # 1) extract the python block

            m = re.search(r'```python\s*([\s\S]*?)\s*```', raw)
            if not m:
                raise ValueError("No ```python ... ``` block found")
            code = m.group(1)

            # 2) normalize all literals to double-quoted form
            code = _force_double_quotes(code)

            # print(f"Normalized code on attempt {attempt}: {code}...")

            # 3) convert the string to a list of tuples
            prompts_list = parse_func(code)
            prompts: List[Tuple[str, str]] = prompts_list 
            print(f"Loaded {len(prompts)} prompt-pairs.")
            print("First pair:", prompts[0])
            return prompts

        except Exception as e:
            print(
                f"[Warning] get_prompt_pairs parse error on attempt {attempt}/{max_retries}: {e}")
            if attempt == max_retries:
                raise RuntimeError(
                    "Failed to parse prompts after multiple attempts") from e
            # otherwise, retry immediately

    # Should never reach here
    raise RuntimeError("Unreachable")

meta_init_prompt = """Please follow the instruction step-by-step to generate a better prompt pair.
1. Cross over the following prompts and generate a new prompt:

Prompt Pair 1: ('Small mature lymphocytes centrally show no indication of active replication.', 'Large immature cells centrally demonstrate ongoing active replication.')
Prompt Pair 2: ('Small, mature lymphocytes centrally show no active processes of chromosome separation.', 'Large, immature cells centrally demonstrate active processes of chromosome separation.')

2. Mutate the prompt generated in Step 1 and generate a final prompt pair in a python tuple (str, str)"""

get_prompt_pairs(meta_init_prompt, client, parse_func=extract_and_parse_prompt_tuple)

Raw response on attempt 1: 1. **Cross over the prompts:**

   * **Prompt 1 (P1):** "Small mature lymphocytes centrally show no indication of active replication."
   * **Prompt 2 (P2):** "Large immature cells centrally demonstrate ongoing active replication."
   * **Prompt 3 (P3):** "Small, mature lymphocytes centrally show no active processes of chromosome separation."
   * **Prompt 4 (P4):** "Large, immature cells centrally demonstrate active processes of chromosome separation."

   Let's combine elements from P1 and P3 for the first part of the new prompt, and P2 and P4 for the second part. We can focus on the core ideas of maturity/immaturity and the presence/absence of replication/division.

   **New Prompt (Crossover):** "Small, mature lymphocytes centrally show no signs of active replication or chromosome separation, while large, immature cells centrally demonstrate active replication and chromosome separation."

2. **Mutate the prompt generated in Step 1 and generate a final pro

('Small, mature lymphocytes centrally exhibit no proliferative activity.',
 'Large, immature cells centrally demonstrate robust mitotic figures.')