diff --git a/haystack/components/evaluators/context_relevance.py b/haystack/components/evaluators/context_relevance.py index 9988bdeb02c..9bd299bbc56 100644 --- a/haystack/components/evaluators/context_relevance.py +++ b/haystack/components/evaluators/context_relevance.py @@ -67,6 +67,7 @@ class ContextRelevanceEvaluator(LLMEvaluator): def __init__( self, examples: Optional[List[Dict[str, Any]]] = None, + progress_bar: bool = True, api: str = "openai", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), ): @@ -89,12 +90,13 @@ def __init__( "statement_scores": [1], }, }] + :param progress_bar: + Whether to show a progress bar during the evaluation. :param api: The API to use for calling an LLM through a Generator. Supported APIs: "openai". :param api_key: The API key. - """ self.instructions = ( "Your task is to judge how relevant the provided context is for answering a question. " @@ -115,6 +117,7 @@ def __init__( examples=self.examples, api=self.api, api_key=self.api_key, + progress_bar=progress_bar, ) @component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]]) diff --git a/haystack/components/evaluators/faithfulness.py b/haystack/components/evaluators/faithfulness.py index 2bcbb9b0865..1e561f66937 100644 --- a/haystack/components/evaluators/faithfulness.py +++ b/haystack/components/evaluators/faithfulness.py @@ -81,6 +81,7 @@ class FaithfulnessEvaluator(LLMEvaluator): def __init__( self, examples: Optional[List[Dict[str, Any]]] = None, + progress_bar: bool = True, api: str = "openai", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), ): @@ -104,6 +105,8 @@ def __init__( "statement_scores": [1, 0], }, }] + :param progress_bar: + Whether to show a progress bar during the evaluation. :param api: The API to use for calling an LLM through a Generator. Supported APIs: "openai". @@ -131,6 +134,7 @@ def __init__( examples=self.examples, api=self.api, api_key=self.api_key, + progress_bar=progress_bar, ) @component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]]) diff --git a/haystack/components/evaluators/llm_evaluator.py b/haystack/components/evaluators/llm_evaluator.py index e4eebbd9ab5..9766f236ad7 100644 --- a/haystack/components/evaluators/llm_evaluator.py +++ b/haystack/components/evaluators/llm_evaluator.py @@ -5,6 +5,8 @@ import json from typing import Any, Dict, List, Tuple, Type +from tqdm import tqdm + from haystack import component, default_from_dict, default_to_dict from haystack.components.builders import PromptBuilder from haystack.components.generators import OpenAIGenerator @@ -50,6 +52,7 @@ def __init__( inputs: List[Tuple[str, Type[List]]], outputs: List[str], examples: List[Dict[str, Any]], + progress_bar: bool = True, *, api: str = "openai", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), @@ -70,6 +73,8 @@ def __init__( `outputs` parameters. Each example is a dictionary with keys "inputs" and "outputs" They contain the input and output as dictionaries respectively. + :param progress_bar: + Whether to show a progress bar during the evaluation. :param api: The API to use for calling an LLM through a Generator. Supported APIs: "openai". @@ -78,13 +83,13 @@ def __init__( """ self.validate_init_parameters(inputs, outputs, examples) - self.instructions = instructions self.inputs = inputs self.outputs = outputs self.examples = examples self.api = api self.api_key = api_key + self.progress_bar = progress_bar if api == "openai": self.generator = OpenAIGenerator( @@ -173,7 +178,7 @@ def run(self, **inputs) -> Dict[str, Any]: list_of_input_names_to_values = [dict(zip(input_names, v)) for v in values] results = [] - for input_names_to_values in list_of_input_names_to_values: + for input_names_to_values in tqdm(list_of_input_names_to_values, disable=not self.progress_bar): prompt = self.builder.run(**input_names_to_values) result = self.generator.run(prompt=prompt["prompt"]) @@ -243,6 +248,7 @@ def to_dict(self) -> Dict[str, Any]: examples=self.examples, api=self.api, api_key=self.api_key.to_dict(), + progress_bar=self.progress_bar, ) @classmethod diff --git a/test/components/evaluators/test_llm_evaluator.py b/test/components/evaluators/test_llm_evaluator.py index b1d41e000c3..1b28dab84e1 100644 --- a/test/components/evaluators/test_llm_evaluator.py +++ b/test/components/evaluators/test_llm_evaluator.py @@ -206,6 +206,7 @@ def test_to_dict_default(self, monkeypatch): "instructions": "test-instruction", "inputs": [("predicted_answers", List[str])], "outputs": ["score"], + "progress_bar": True, "examples": [ {"inputs": {"predicted_answers": "Football is the most popular sport."}, "outputs": {"score": 0}} ], @@ -266,6 +267,7 @@ def test_to_dict_with_parameters(self, monkeypatch): "instructions": "test-instruction", "inputs": [("predicted_answers", List[str])], "outputs": ["custom_score"], + "progress_bar": True, "examples": [ { "inputs": {"predicted_answers": "Damn, this is straight outta hell!!!"},