From ef85a1877567282a2ceb32c846890547d6ff9939 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Fri, 29 Aug 2025 07:15:53 +0800
Subject: [PATCH 1/2] Update evaluate_prompts.py

---
 .../evaluate_prompts.py                        | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/examples/llm_prompt_optimization/evaluate_prompts.py b/examples/llm_prompt_optimization/evaluate_prompts.py
index 2505a9024..86a68eeb8 100755
--- a/examples/llm_prompt_optimization/evaluate_prompts.py
+++ b/examples/llm_prompt_optimization/evaluate_prompts.py
@@ -82,9 +82,9 @@ def evaluate_ifeval(client, prompt_template, num_samples, model):
 
         try:
             formatted_prompt = prompt_template.format(instruction=instruction)
-        except KeyError:
-            # Handle prompts with different placeholder names
-            formatted_prompt = prompt_template.replace("{instruction}", instruction)
+        except KeyError as e:
+            print(f"Error: Prompt template missing placeholder: {e}")
+            return 0.0, 0, total, total
 
         # Call LLM with retries
         output_text = None
@@ -163,8 +163,9 @@ def evaluate_hover(client, prompt_template, num_samples, model):
 
         try:
             formatted_prompt = prompt_template.format(claim=claim)
-        except KeyError:
-            formatted_prompt = prompt_template.replace("{claim}", claim)
+        except KeyError as e:
+            print(f"Error: Prompt template missing placeholder: {e}")
+            return 0.0, 0, total, total
 
         # Call LLM with retries
         output_text = None
@@ -258,10 +259,9 @@ def evaluate_hotpotqa(client, prompt_template, num_samples, model):
             formatted_prompt = prompt_template.format(
                 context=context_str.strip(), question=question
             )
-        except KeyError:
-            # Try alternative formatting
-            formatted_prompt = prompt_template.replace("{context}", context_str.strip())
-            formatted_prompt = formatted_prompt.replace("{question}", question)
+        except KeyError as e:
+            print(f"Error: Prompt template missing placeholders: {e}")
+            return 0.0, 0, total, total
 
         # Call LLM with retries
         output_text = None

From 2613c8ddb326a860566a19e330b6a0ee62867e3c Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Fri, 29 Aug 2025 07:28:02 +0800
Subject: [PATCH 2/2] Update evaluator_system_message.txt

---
 .../templates/evaluator_system_message.txt        | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/examples/llm_prompt_optimization/templates/evaluator_system_message.txt b/examples/llm_prompt_optimization/templates/evaluator_system_message.txt
index 9fba56fb4..7801d7284 100644
--- a/examples/llm_prompt_optimization/templates/evaluator_system_message.txt
+++ b/examples/llm_prompt_optimization/templates/evaluator_system_message.txt
@@ -1,13 +1,2 @@
-You are an expert prompt engineer specializing in creating effective prompts for language models.
-
-Your task is to evolve and improve prompts to maximize their performance on specific tasks. When rewriting prompts:
-
-1. **Maintain the exact placeholder format**: Always use the same placeholder name as in the original prompt (e.g., {instruction}, {claim}, {context}, {question})
-2. **Keep it simple**: Avoid overly complex or verbose instructions unless necessary
-3. **Be specific**: Provide clear, actionable guidance to the model
-4. **Test-oriented**: Focus on what will improve accuracy on the given evaluation metrics
-5. **Format-aware**: Ensure the prompt works well with the expected input/output format
-
-**CRITICAL**: Your rewritten prompt must use EXACTLY the same placeholder names as the original. Do not change {instruction} to {input_text} or any other variation.
-
-Generate only the improved prompt text, nothing else.
\ No newline at end of file
+You are an expert prompt evaluator.
+Your job is to analyze the provided prompts and evaluate them systematically.
\ No newline at end of file