From ef85a1877567282a2ceb32c846890547d6ff9939 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Fri, 29 Aug 2025 07:15:53 +0800 Subject: [PATCH 1/2] Update evaluate_prompts.py --- .../evaluate_prompts.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/llm_prompt_optimization/evaluate_prompts.py b/examples/llm_prompt_optimization/evaluate_prompts.py index 2505a9024..86a68eeb8 100755 --- a/examples/llm_prompt_optimization/evaluate_prompts.py +++ b/examples/llm_prompt_optimization/evaluate_prompts.py @@ -82,9 +82,9 @@ def evaluate_ifeval(client, prompt_template, num_samples, model): try: formatted_prompt = prompt_template.format(instruction=instruction) - except KeyError: - # Handle prompts with different placeholder names - formatted_prompt = prompt_template.replace("{instruction}", instruction) + except KeyError as e: + print(f"Error: Prompt template missing placeholder: {e}") + return 0.0, 0, total, total # Call LLM with retries output_text = None @@ -163,8 +163,9 @@ def evaluate_hover(client, prompt_template, num_samples, model): try: formatted_prompt = prompt_template.format(claim=claim) - except KeyError: - formatted_prompt = prompt_template.replace("{claim}", claim) + except KeyError as e: + print(f"Error: Prompt template missing placeholder: {e}") + return 0.0, 0, total, total # Call LLM with retries output_text = None @@ -258,10 +259,9 @@ def evaluate_hotpotqa(client, prompt_template, num_samples, model): formatted_prompt = prompt_template.format( context=context_str.strip(), question=question ) - except KeyError: - # Try alternative formatting - formatted_prompt = prompt_template.replace("{context}", context_str.strip()) - formatted_prompt = formatted_prompt.replace("{question}", question) + except KeyError as e: + print(f"Error: Prompt template missing placeholders: {e}") + return 0.0, 0, total, total # Call LLM with retries output_text = None From 2613c8ddb326a860566a19e330b6a0ee62867e3c Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Fri, 29 Aug 2025 07:28:02 +0800 Subject: [PATCH 2/2] Update evaluator_system_message.txt --- .../templates/evaluator_system_message.txt | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/examples/llm_prompt_optimization/templates/evaluator_system_message.txt b/examples/llm_prompt_optimization/templates/evaluator_system_message.txt index 9fba56fb4..7801d7284 100644 --- a/examples/llm_prompt_optimization/templates/evaluator_system_message.txt +++ b/examples/llm_prompt_optimization/templates/evaluator_system_message.txt @@ -1,13 +1,2 @@ -You are an expert prompt engineer specializing in creating effective prompts for language models. - -Your task is to evolve and improve prompts to maximize their performance on specific tasks. When rewriting prompts: - -1. **Maintain the exact placeholder format**: Always use the same placeholder name as in the original prompt (e.g., {instruction}, {claim}, {context}, {question}) -2. **Keep it simple**: Avoid overly complex or verbose instructions unless necessary -3. **Be specific**: Provide clear, actionable guidance to the model -4. **Test-oriented**: Focus on what will improve accuracy on the given evaluation metrics -5. **Format-aware**: Ensure the prompt works well with the expected input/output format - -**CRITICAL**: Your rewritten prompt must use EXACTLY the same placeholder names as the original. Do not change {instruction} to {input_text} or any other variation. - -Generate only the improved prompt text, nothing else. \ No newline at end of file +You are an expert prompt evaluator. +Your job is to analyze the provided prompts and evaluate them systematically. \ No newline at end of file