# Automatic Prompt Engineer (APE)

The model writes 10 candidate prompts → tests them → ranks → evolves → finds the best prompt automatically.

Mind-blowing performance on classification, generation, reasoning tasks.

In [None]:
!pip install openai python-dotenv --quiet

In [None]:
import openai, os, random
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY") or input("OpenAI key: ")

def ape_generate_prompts(task, num=8):
    prompts = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": f"""
Generate {num} diverse, creative, high-quality system prompts that would make an LLM excel at this task:

Task: {task}

Return only a numbered list, no explanations.
"""}],
        temperature=1.0
    ).choices[0].message.content
    return [p.strip()[p.strip().find(" ")+1:] for p in prompts.split("\n") if p.strip() and any(c.isdigit() for c in p)]

def ape_evaluate(prompts, examples):
    scores = []
    for p in prompts:
        total = 0
        for ex in examples:
            resp = openai.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "system", "content": p}, {"role": "user", "content": ex[0]}],
                temperature=0
            ).choices[0].message.content.strip()
            total += 1 if resp.lower() == ex[1].lower() else 0
        scores.append((p, total / len(examples)))
    return sorted(scores, key=lambda x: x[1], reverse=True)

In [None]:
task = input("Task description (e.g., 'classify movie reviews as positive or negative'): ")

print("\nGenerating candidate prompts...")
candidates = ape_generate_prompts(task)
print(f"Generated {len(candidates)} prompts\n")

examples = []
print("Now enter 4–6 input/output examples:")
for i in range(6):
    inp = input(f"Input {i+1}: ")
    if not inp: break
    out = input(f"Correct output {i+1}: ")
    examples.append((inp, out))

print("\nEvaluating prompts...")
ranked = ape_evaluate(candidates, examples)

print("\nTOP 3 AUTOMATICALLY DISCOVERED PROMPTS:")
for i, (prompt, score) in enumerate(ranked[:3]):
    print(f"\n#{i+1} – Score: {score:.2%}")
    print(prompt)