Merge branch 'main' into failsafe-for-non-valid-JSON

deepset-ai · May 23, 2024 · a7d7879 · a7d7879
2 parents 8ce0c9d + a4fc2b6
commit a7d7879
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 1 deletion.
diff --git a/haystack/components/evaluators/context_relevance.py b/haystack/components/evaluators/context_relevance.py
@@ -67,6 +67,7 @@ class ContextRelevanceEvaluator(LLMEvaluator):
     def __init__(
         self,
         examples: Optional[List[Dict[str, Any]]] = None,
+        progress_bar: bool = True,
         api: str = "openai",
         api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
         raise_on_failure: bool = True,
@@ -90,6 +91,8 @@ def __init__(
                     "statement_scores": [1],
                 },
             }]
+        :param progress_bar:
+            Whether to show a progress bar during the evaluation.
         :param api:
             The API to use for calling an LLM through a Generator.
             Supported APIs: "openai".
@@ -119,6 +122,7 @@ def __init__(
             api=self.api,
             api_key=self.api_key,
             raise_on_failure=raise_on_failure,
+            progress_bar=progress_bar,
         )
 
     @component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]])

diff --git a/haystack/components/evaluators/faithfulness.py b/haystack/components/evaluators/faithfulness.py
@@ -81,6 +81,7 @@ class FaithfulnessEvaluator(LLMEvaluator):
     def __init__(
         self,
         examples: Optional[List[Dict[str, Any]]] = None,
+        progress_bar: bool = True,
         api: str = "openai",
         api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
         raise_on_failure: bool = True,
@@ -105,6 +106,8 @@ def __init__(
                     "statement_scores": [1, 0],
                 },
             }]
+        :param progress_bar:
+            Whether to show a progress bar during the evaluation.
         :param api:
             The API to use for calling an LLM through a Generator.
             Supported APIs: "openai".
@@ -135,6 +138,7 @@ def __init__(
             api=self.api,
             api_key=self.api_key,
             raise_on_failure=raise_on_failure,
+            progress_bar=progress_bar,
         )
 
     @component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]])

diff --git a/haystack/components/evaluators/llm_evaluator.py b/haystack/components/evaluators/llm_evaluator.py
@@ -6,6 +6,8 @@
 from typing import Any, Dict, List, Optional, Tuple, Type
 from warnings import warn
 
+from tqdm import tqdm
+
 from haystack import component, default_from_dict, default_to_dict
 from haystack.components.builders import PromptBuilder
 from haystack.components.generators import OpenAIGenerator
@@ -51,6 +53,7 @@ def __init__(
         inputs: List[Tuple[str, Type[List]]],
         outputs: List[str],
         examples: List[Dict[str, Any]],
+        progress_bar: bool = True,
         *,
         raise_on_failure: bool = True,
         api: str = "openai",
@@ -74,6 +77,8 @@ def __init__(
             They contain the input and output as dictionaries respectively.
         :param raise_on_failure:
             If True, the component will raise an exception on an unsuccessful API call.
+        :param progress_bar:
+            Whether to show a progress bar during the evaluation.
         :param api:
             The API to use for calling an LLM through a Generator.
             Supported APIs: "openai".
@@ -89,6 +94,7 @@ def __init__(
         self.examples = examples
         self.api = api
         self.api_key = api_key
+        self.progress_bar = progress_bar
 
         if api == "openai":
             self.generator = OpenAIGenerator(
@@ -179,7 +185,7 @@ def run(self, **inputs) -> Dict[str, Any]:
 
         results: List[Optional[Dict[str, Any]]] = []
         errors = 0
-        for input_names_to_values in list_of_input_names_to_values:
+        for input_names_to_values in tqdm(list_of_input_names_to_values, disable=not self.progress_bar):
             prompt = self.builder.run(**input_names_to_values)
             try:
                 result = self.generator.run(prompt=prompt["prompt"])
@@ -265,6 +271,7 @@ def to_dict(self) -> Dict[str, Any]:
             examples=self.examples,
             api=self.api,
             api_key=self.api_key.to_dict(),
+            progress_bar=self.progress_bar,
         )
 
     @classmethod

diff --git a/test/components/evaluators/test_llm_evaluator.py b/test/components/evaluators/test_llm_evaluator.py
@@ -207,6 +207,7 @@ def test_to_dict_default(self, monkeypatch):
                 "instructions": "test-instruction",
                 "inputs": [("predicted_answers", List[str])],
                 "outputs": ["score"],
+                "progress_bar": True,
                 "examples": [
                     {"inputs": {"predicted_answers": "Football is the most popular sport."}, "outputs": {"score": 0}}
                 ],
@@ -267,6 +268,7 @@ def test_to_dict_with_parameters(self, monkeypatch):
                 "instructions": "test-instruction",
                 "inputs": [("predicted_answers", List[str])],
                 "outputs": ["custom_score"],
+                "progress_bar": True,
                 "examples": [
                     {
                         "inputs": {"predicted_answers": "Damn, this is straight outta hell!!!"},