diff --git a/examples/llm_prompt_optimization/evaluate_prompts.py b/examples/llm_prompt_optimization/evaluate_prompts.py index 2505a9024..86a68eeb8 100755 --- a/examples/llm_prompt_optimization/evaluate_prompts.py +++ b/examples/llm_prompt_optimization/evaluate_prompts.py @@ -82,9 +82,9 @@ def evaluate_ifeval(client, prompt_template, num_samples, model): try: formatted_prompt = prompt_template.format(instruction=instruction) - except KeyError: - # Handle prompts with different placeholder names - formatted_prompt = prompt_template.replace("{instruction}", instruction) + except KeyError as e: + print(f"Error: Prompt template missing placeholder: {e}") + return 0.0, 0, total, total # Call LLM with retries output_text = None @@ -163,8 +163,9 @@ def evaluate_hover(client, prompt_template, num_samples, model): try: formatted_prompt = prompt_template.format(claim=claim) - except KeyError: - formatted_prompt = prompt_template.replace("{claim}", claim) + except KeyError as e: + print(f"Error: Prompt template missing placeholder: {e}") + return 0.0, 0, total, total # Call LLM with retries output_text = None @@ -258,10 +259,9 @@ def evaluate_hotpotqa(client, prompt_template, num_samples, model): formatted_prompt = prompt_template.format( context=context_str.strip(), question=question ) - except KeyError: - # Try alternative formatting - formatted_prompt = prompt_template.replace("{context}", context_str.strip()) - formatted_prompt = formatted_prompt.replace("{question}", question) + except KeyError as e: + print(f"Error: Prompt template missing placeholders: {e}") + return 0.0, 0, total, total # Call LLM with retries output_text = None diff --git a/examples/llm_prompt_optimization/templates/evaluator_system_message.txt b/examples/llm_prompt_optimization/templates/evaluator_system_message.txt index 9fba56fb4..7801d7284 100644 --- a/examples/llm_prompt_optimization/templates/evaluator_system_message.txt +++ b/examples/llm_prompt_optimization/templates/evaluator_system_message.txt @@ -1,13 +1,2 @@ -You are an expert prompt engineer specializing in creating effective prompts for language models. - -Your task is to evolve and improve prompts to maximize their performance on specific tasks. When rewriting prompts: - -1. **Maintain the exact placeholder format**: Always use the same placeholder name as in the original prompt (e.g., {instruction}, {claim}, {context}, {question}) -2. **Keep it simple**: Avoid overly complex or verbose instructions unless necessary -3. **Be specific**: Provide clear, actionable guidance to the model -4. **Test-oriented**: Focus on what will improve accuracy on the given evaluation metrics -5. **Format-aware**: Ensure the prompt works well with the expected input/output format - -**CRITICAL**: Your rewritten prompt must use EXACTLY the same placeholder names as the original. Do not change {instruction} to {input_text} or any other variation. - -Generate only the improved prompt text, nothing else. \ No newline at end of file +You are an expert prompt evaluator. +Your job is to analyze the provided prompts and evaluate them systematically. \ No newline at end of file