In [136]:
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Callable
import re
import textwrap

@dataclass
# PromptTemplate = a reusable prompt “object” with placeholders like {role}, {task}, etc.
class PromptTemplate:
    name: str
    template: str
    notes: str = ""
    tags: List[str] = field(default_factory=list)

    def render(self, **kwargs) -> str:
        return self.template.format(**kwargs)

# dedent = cleans indentation so your templates look nice
def dedent(s: str) -> str:
    return textwrap.dedent(s).strip()

# extract_with_regex = optional helper for “answer engineering” (pull FinalAnswer: etc.)
def extract_with_regex(text: str, pattern: str, group: int = 1) -> Optional[str]:
    """Tiny 'answer engineering' helper: extract a stable field from a messy response."""
    m = re.search(pattern, text, flags=re.DOTALL)
    return m.group(group).strip() if m else None

## modern prompt engineering is really about:

Controlling the probability distribution over outputs.

Each component reduces entropy:

Directive → narrows task space

Context → narrows solution space

Format → narrows output space

Examples → shifts distribution toward demonstrated pattern

Checks → biases toward higher-quality completions

In [137]:
# Reusable “skeleton prompts”

# BASE_SKELETON = minimal universal base, no ICL, no do-nots, minimal checks.
# Examples are missing explicitly. 
# That was intentional because: Not all prompts use ICL, minimal universal base.
# Checks: Forces internal evaluation, Improves reliability, Acts like lightweight self-critique
BASE_SKELETON = PromptTemplate(
    name="Base Skeleton",
    tags=["general"],
    template=dedent("""
    ROLE:
    {role}

    DIRECTIVE:
    {directive}

    CONTEXT:
    {context}

    OUTPUT FORMAT:
    {format}

    QUALITY BAR / CHECKS:
    {checks} 
    """),
)

BASE_SKELETON_ICL = PromptTemplate(
    name="Base Skeleton (Full Components)",
    tags=["general"],
    template=dedent("""
    ROLE:
    {role}

    DIRECTIVE:
    {directive}

    CONTEXT:
    {context}

    EXAMPLES (optional, for ICL):
    {examples}

    OUTPUT FORMAT:
    {format}

    QUALITY BAR / CHECKS:
    {checks}
    """),
)

BASE_SKELETON_BETTER = PromptTemplate(
    name="Base Skeleton (Engineer)",
    tags=["general"],
    template=dedent("""
    ROLE:
    {role}

    DIRECTIVE:
    {directive}

    CONTEXT / GIVEN:
    {context}

    EXAMPLES (optional):
    {examples}
                    
    OUTPUT FORMAT (strict):
    {format}

    NON-GOALS / DO-NOT:
    {donts}

    QUALITY BAR / CHECKS:
    {checks}
    """),
)

In [138]:
CODEGEN_SKELETON = PromptTemplate(
    name="Codegen Skeleton",
    tags=["codegen"],
    template=dedent("""
    You are a careful software engineer.

    Task:
    {task}

    Constraints:
    - Language: {language}
    - Must satisfy: {requirements}
    - Edge cases: {edge_cases}

    Output format (strict):
    {output_format}

    If uncertain, state assumptions explicitly (do not invent APIs).
    """),
)

CODEGEN_SKELETON_BETTER = PromptTemplate(
    name="Codegen Skeleton (Engineer)",
    tags=["codegen"],
    template=dedent("""
    You are a careful software engineer.

    Task:
    {task}

    GIVEN (inputs/interfaces):
    {given}

    Constraints:
    {constraints}

    Edge cases:
    {edge_cases}

    Output format (STRICT):
    {output_format}

    Non-goals / do-not:
    {donts}

    Acceptance tests (must pass):
    {tests}

    If critical info is missing, ask up to 3 questions; otherwise state assumptions explicitly.
    """),
)


In [139]:
PROMPTS = []

def add_prompt(name: str, text: str, tags: List[str]):
    PROMPTS.append(PromptTemplate(name=name, template=dedent(text), tags=tags))

add_prompt("ValueIteration / ICL", """
Here is an example of the coding style and structure expected:
{example_code}

Now generate Python code that implements VALUE ITERATION for a finite MDP with the required function signatures.
""", tags=["codegen", "icl"])

add_prompt("ValueIteration / Thought-first", """
First, outline the Value Iteration algorithm step by step,
including how the Bellman optimality update is computed and
how convergence is detected.

Then, using that outline, generate Python code that implements
value_iteration and extract_policy with the specified signatures.

Return only the final Python code.
""", tags=["codegen", "thought-first"])

add_prompt("ValueIteration / Decomposition", """
Break the Value Iteration task into steps:
1) Write function signatures and docstrings for value_iteration and extract_policy.
2) Write pseudocode for the Bellman optimality update.
3) Write the final Python implementation that follows the pseudocode.

Output the final Python code only.
""", tags=["codegen", "decomposition"])

add_prompt("ValueIteration / Self-critique", """
Generate Python code that implements Value Iteration for a finite MDP with the required function signatures.

Now review the generated code for:
- Incorrect handling of terminal states
- Missing convergence checks
- Signature mismatches

Regenerate a corrected version of the code.
Return only the corrected Python code.
""", tags=["codegen", "self-critique"])

add_prompt("ValueIteration / Ensembling", """
Generate three independent implementations of Value Iteration that satisfy the required interface.

Then compare them and select the version that:
- Handles terminal states correctly
- Uses clear variable naming
- Is most likely to pass an automated test harness

Return only the selected implementation.
""", tags=["codegen", "ensemble"])

#[(p.name, p.tags) for p in PROMPTS]

In [140]:
def list_prompts(tag: str | None = None):
    for p in PROMPTS:
        if tag is None or tag in p.tags:
            print("-", p.name, p.tags)

def get_prompt(name: str) -> PromptTemplate:
    return next(p for p in PROMPTS if p.name == name)

# demo
list_prompts("codegen")


- ValueIteration / ICL ['codegen', 'icl']
- ValueIteration / Thought-first ['codegen', 'thought-first']
- ValueIteration / Decomposition ['codegen', 'decomposition']
- ValueIteration / Self-critique ['codegen', 'self-critique']
- ValueIteration / Ensembling ['codegen', 'ensemble']


In [141]:
def render_prompt(name: str, **kwargs) -> str:
    tpl = get_prompt(name)
    return tpl.render(**kwargs)

# demo
print(render_prompt("ValueIteration / Thought-first"))


First, outline the Value Iteration algorithm step by step,
including how the Bellman optimality update is computed and
how convergence is detected.

Then, using that outline, generate Python code that implements
value_iteration and extract_policy with the specified signatures.

Return only the final Python code.


# The intended workflow (what you actually do with it)
## Option A (most common): generate a prompt → copy/paste into ChatGPT/LLM

You fill a template with .render(...), then you literally copy that string and paste it into the LLM.

Example with the codegen skeleton:

In [142]:
prompt = BASE_SKELETON.render(
    role="You are an AI tutor for a graduate CS student.",
    directive="Explain X clearly with an example.",
    context="Audience: someone who codes in Python/C++ and wants practical intuition.",
    format="Return Markdown with headings + one worked example.",
    checks="- No missing definitions\n- Example is minimal but correct"
)
print(prompt)

ROLE:
You are an AI tutor for a graduate CS student.

DIRECTIVE:
Explain X clearly with an example.

CONTEXT:
Audience: someone who codes in Python/C++ and wants practical intuition.

OUTPUT FORMAT:
Return Markdown with headings + one worked example.

QUALITY BAR / CHECKS:
- No missing definitions
- Example is minimal but correct


In [143]:
prompt = CODEGEN_SKELETON.render(
    task="Implement a function to reverse a linked list.",
    language="Python",
    requirements="Must run in O(n) time and O(1) space.",
    edge_cases="Empty list, single-node list.",
    output_format="A single Python function definition with no extra text."
)
print(prompt)

You are a careful software engineer.

Task:
Implement a function to reverse a linked list.

Constraints:
- Language: Python
- Must satisfy: Must run in O(n) time and O(1) space.
- Edge cases: Empty list, single-node list.

Output format (strict):
A single Python function definition with no extra text.

If uncertain, state assumptions explicitly (do not invent APIs).


In [144]:
prompt = CODEGEN_SKELETON.render(
    task="Implement value iteration for an MDP with states nS and actions nA.",
    language="Python",
    requirements="Match the provided function signatures; must converge using theta; return V as np.array",
    edge_cases="Terminal states; multiple actions with same value; empty transitions",
    output_format="Return only Python code. No explanation."
)
print(prompt)


You are a careful software engineer.

Task:
Implement value iteration for an MDP with states nS and actions nA.

Constraints:
- Language: Python
- Must satisfy: Match the provided function signatures; must converge using theta; return V as np.array
- Edge cases: Terminal states; multiple actions with same value; empty transitions

Output format (strict):
Return only Python code. No explanation.

If uncertain, state assumptions explicitly (do not invent APIs).


## In-Context Learning (ICL) Example

In [145]:
prompt = BASE_SKELETON_BETTER.render(
    role="You are a careful software engineer.",
    directive="Implement value_iteration and extract_policy for a finite MDP.",
    context="""
States: nS
Actions: nA
Transition model: P[s][a] -> list of (prob, next_state, reward)
Discount factor gamma
Convergence threshold theta
""",
    examples="""
Example of coding style:

def example_algorithm(data):
    values = [0.0 for _ in range(len(data))]
    while True:
        delta = 0
        for i in range(len(data)):
            new_val = values[i] + 1
            delta = max(delta, abs(new_val - values[i]))
            values[i] = new_val
        if delta < 1e-6:
            break
    return values
""",
    format="Return only valid Python code. No explanation.",
    donts="Do not change function signatures.",
    checks="- Must converge\n- Must handle terminal states"
)

print(prompt)


ROLE:
You are a careful software engineer.

DIRECTIVE:
Implement value_iteration and extract_policy for a finite MDP.

CONTEXT / GIVEN:

States: nS
Actions: nA
Transition model: P[s][a] -> list of (prob, next_state, reward)
Discount factor gamma
Convergence threshold theta


EXAMPLES (optional):

Example of coding style:

def example_algorithm(data):
    values = [0.0 for _ in range(len(data))]
    while True:
        delta = 0
        for i in range(len(data)):
            new_val = values[i] + 1
            delta = max(delta, abs(new_val - values[i]))
            values[i] = new_val
        if delta < 1e-6:
            break
    return values


OUTPUT FORMAT (strict):
Return only valid Python code. No explanation.

NON-GOALS / DO-NOT:
Do not change function signatures.

QUALITY BAR / CHECKS:
- Must converge
- Must handle terminal states


## Thought Generation (Plan → Code) example

In [146]:
prompt = BASE_SKELETON_BETTER.render(
    role="You are a careful software engineer.",
    directive="""
First, outline the Value Iteration algorithm step by step.
Then implement value_iteration and extract_policy.
""",
    context="Finite MDP with P[s][a] transitions.",
    examples="",
    format="""
Section 1: Outline (numbered steps)
Section 2: Final Python code only
""",
    donts="Do not skip the outline.",
    checks="- Bellman update must be correct\n- Convergence check included"
)

print(prompt)


ROLE:
You are a careful software engineer.

DIRECTIVE:

First, outline the Value Iteration algorithm step by step.
Then implement value_iteration and extract_policy.


CONTEXT / GIVEN:
Finite MDP with P[s][a] transitions.

EXAMPLES (optional):


OUTPUT FORMAT (strict):

Section 1: Outline (numbered steps)
Section 2: Final Python code only


NON-GOALS / DO-NOT:
Do not skip the outline.

QUALITY BAR / CHECKS:
- Bellman update must be correct
- Convergence check included


## Decomposition Example

In [147]:
prompt = BASE_SKELETON_BETTER.render(
    role="You are a software engineer designing modular code.",
    directive="""
Break the task into steps:
1) Write function signatures.
2) Write pseudocode for Bellman update.
3) Write final implementation.
""",
    context="Finite MDP, gamma, theta.",
    examples="",
    format="""
Section 1: Signatures
Section 2: Pseudocode
Section 3: Final Python code only
""",
    donts="Do not skip steps.",
    checks="- Pseudocode matches implementation"
)

print(prompt)

ROLE:
You are a software engineer designing modular code.

DIRECTIVE:

Break the task into steps:
1) Write function signatures.
2) Write pseudocode for Bellman update.
3) Write final implementation.


CONTEXT / GIVEN:
Finite MDP, gamma, theta.

EXAMPLES (optional):


OUTPUT FORMAT (strict):

Section 1: Signatures
Section 2: Pseudocode
Section 3: Final Python code only


NON-GOALS / DO-NOT:
Do not skip steps.

QUALITY BAR / CHECKS:
- Pseudocode matches implementation


## Sel-Critique Example, next 3 cells!

Technique idea:
Generate → Review → Regenerate.

Here we use a 3-step-workflow.

There are actually three LLM calls in self-critique:

Generate

Critique

Revise

Why This Works

The model is very good at:

Producing an answer

Evaluating an answer

Improving an answer

But it rarely does all 3 perfectly in one shot.

In [148]:
# STEP 1 GENERATE
generate_prompt = CODEGEN_SKELETON_BETTER.render(
    task="Implement Value Iteration for finite MDP.",
    given="Function signatures must remain unchanged.",
    constraints="Use gamma, theta.",
    edge_cases="Terminal states.",
    output_format="Return only Python code.",
    donts="Do not invent APIs.",
    tests="Should converge on small MDP example."
)


In [149]:
# STEP 2 SELF-CRITIQUE
critique_prompt = BASE_SKELETON.render(
    role="You are a senior engineer reviewing code.",
    directive="Review the following implementation and list errors or weaknesses.",
    context="{PASTE_GENERATED_CODE_HERE}",
    examples="",
    format="Bullet list of issues.",
    donts="Do not rewrite code yet.",
    checks="- Check terminal states\n- Check convergence logic"
)

In [150]:
# STEP 3 REGENERATE
revise_prompt = BASE_SKELETON.render(
    role="You are a careful engineer fixing issues.",
    directive="Regenerate corrected implementation based on critique.",
    context="{PASTE_CRITIQUE_AND_CODE}",
    examples="",
    format="Return only corrected Python code.",
    donts="Do not include explanation.",
    checks="- All critique points addressed"
)


## Ensembling Example

In [151]:
ensemble_prompt = BASE_SKELETON_BETTER.render(
    role="You are a software engineer.",
    directive="Generate three independent implementations of Value Iteration.",
    context="Finite MDP.",
    examples="",
    format="""
Implementation 1:
Implementation 2:
Implementation 3:
""",
    donts="Do not merge implementations.",
    checks="- Each must be self-contained"
)


In [152]:
select_prompt = BASE_SKELETON.render(
    role="You are a senior engineer selecting best implementation.",
    directive="Compare the three implementations and select the best one.",
    context="{PASTE_3_IMPLEMENTATIONS}",
    examples="",
    format="Return only the selected implementation.",
    donts="Do not explain selection.",
    checks="- Must handle terminal states\n- Must converge"
)


# Option B: use your saved library prompts (PROMPTS) and fill placeholders

Your “ValueIteration / ICL” prompt has {example_code}. You fill it like this:

In [153]:
# find the template
tpl = next(p for p in PROMPTS if p.name == "ValueIteration / ICL")

prompt = tpl.render(example_code="""
def example_algorithm(data):
    values = [0.0 for _ in range(len(data))]
    while True:
        delta = 0
        for i in range(len(data)):
            new_val = values[i] + 1
            delta = max(delta, abs(new_val - values[i]))
            values[i] = new_val
        if delta < 1e-6:
            break
    return values
""")

print(prompt)


Here is an example of the coding style and structure expected:

def example_algorithm(data):
    values = [0.0 for _ in range(len(data))]
    while True:
        delta = 0
        for i in range(len(data)):
            new_val = values[i] + 1
            delta = max(delta, abs(new_val - values[i]))
            values[i] = new_val
        if delta < 1e-6:
            break
    return values


Now generate Python code that implements VALUE ITERATION for a finite MDP with the required function signatures.


In [154]:
resp = """
Some reasoning...
FinalAnswer: 0.95
Extra text...
"""

final = extract_with_regex(resp, r"FinalAnswer:\s*(.*)")
print(final)  # -> 0.95


0.95
Extra text...
