Skip to content

Commit

Permalink
Merge branch 'main' into feat/split_with_threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
Halpph committed May 23, 2024
2 parents 9072d47 + 482f60e commit 63551dd
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 8 deletions.
5 changes: 4 additions & 1 deletion haystack/components/evaluators/context_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class ContextRelevanceEvaluator(LLMEvaluator):
def __init__(
self,
examples: Optional[List[Dict[str, Any]]] = None,
progress_bar: bool = True,
api: str = "openai",
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
):
Expand All @@ -89,12 +90,13 @@ def __init__(
"statement_scores": [1],
},
}]
:param progress_bar:
Whether to show a progress bar during the evaluation.
:param api:
The API to use for calling an LLM through a Generator.
Supported APIs: "openai".
:param api_key:
The API key.
"""
self.instructions = (
"Your task is to judge how relevant the provided context is for answering a question. "
Expand All @@ -115,6 +117,7 @@ def __init__(
examples=self.examples,
api=self.api,
api_key=self.api_key,
progress_bar=progress_bar,
)

@component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]])
Expand Down
4 changes: 4 additions & 0 deletions haystack/components/evaluators/faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class FaithfulnessEvaluator(LLMEvaluator):
def __init__(
self,
examples: Optional[List[Dict[str, Any]]] = None,
progress_bar: bool = True,
api: str = "openai",
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
):
Expand All @@ -104,6 +105,8 @@ def __init__(
"statement_scores": [1, 0],
},
}]
:param progress_bar:
Whether to show a progress bar during the evaluation.
:param api:
The API to use for calling an LLM through a Generator.
Supported APIs: "openai".
Expand Down Expand Up @@ -131,6 +134,7 @@ def __init__(
examples=self.examples,
api=self.api,
api_key=self.api_key,
progress_bar=progress_bar,
)

@component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]])
Expand Down
10 changes: 8 additions & 2 deletions haystack/components/evaluators/llm_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import json
from typing import Any, Dict, List, Tuple, Type

from tqdm import tqdm

from haystack import component, default_from_dict, default_to_dict
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
Expand Down Expand Up @@ -50,6 +52,7 @@ def __init__(
inputs: List[Tuple[str, Type[List]]],
outputs: List[str],
examples: List[Dict[str, Any]],
progress_bar: bool = True,
*,
api: str = "openai",
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
Expand All @@ -70,6 +73,8 @@ def __init__(
`outputs` parameters.
Each example is a dictionary with keys "inputs" and "outputs"
They contain the input and output as dictionaries respectively.
:param progress_bar:
Whether to show a progress bar during the evaluation.
:param api:
The API to use for calling an LLM through a Generator.
Supported APIs: "openai".
Expand All @@ -78,13 +83,13 @@ def __init__(
"""
self.validate_init_parameters(inputs, outputs, examples)

self.instructions = instructions
self.inputs = inputs
self.outputs = outputs
self.examples = examples
self.api = api
self.api_key = api_key
self.progress_bar = progress_bar

if api == "openai":
self.generator = OpenAIGenerator(
Expand Down Expand Up @@ -173,7 +178,7 @@ def run(self, **inputs) -> Dict[str, Any]:
list_of_input_names_to_values = [dict(zip(input_names, v)) for v in values]

results = []
for input_names_to_values in list_of_input_names_to_values:
for input_names_to_values in tqdm(list_of_input_names_to_values, disable=not self.progress_bar):
prompt = self.builder.run(**input_names_to_values)
result = self.generator.run(prompt=prompt["prompt"])

Expand Down Expand Up @@ -243,6 +248,7 @@ def to_dict(self) -> Dict[str, Any]:
examples=self.examples,
api=self.api,
api_key=self.api_key.to_dict(),
progress_bar=self.progress_bar,
)

@classmethod
Expand Down
8 changes: 7 additions & 1 deletion haystack/components/readers/extractive.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def _preprocess(
"""
texts = []
document_ids = []
document_contents = []
for i, doc in enumerate(documents):
if doc.content is None:
warnings.warn(
Expand All @@ -219,9 +220,11 @@ def _preprocess(
continue
texts.append(doc.content)
document_ids.append(i)
document_contents.append(doc.content)

encodings_pt = self.tokenizer( # type: ignore
queries,
[document.content for document in documents],
document_contents,
padding=True,
truncation=True,
max_length=max_seq_length,
Expand Down Expand Up @@ -571,6 +574,9 @@ def run(
:raises ComponentError:
If the component was not warmed up by calling 'warm_up()' before.
"""
if not documents:
return {"answers": []}

queries = [query] # Temporary solution until we have decided what batching should look like in v2
nested_documents = [documents]
if self.model is None:
Expand Down
2 changes: 1 addition & 1 deletion haystack/components/websearch/serper_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def run(self, query: str) -> Dict[str, Union[List[Document], List[str]]]:

# we get the snippet from the json result and put it in the content field of the document
organic = [
Document(meta={k: v for k, v in d.items() if k != "snippet"}, content=d["snippet"])
Document(meta={k: v for k, v in d.items() if k != "snippet"}, content=d.get("snippet"))
for d in json_result["organic"]
]

Expand Down
4 changes: 4 additions & 0 deletions releasenotes/notes/reader-crash-no-docs-53085ce48baaae81.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Return an empty list of answers when `ExtractiveReader` receives an empty list of documents instead of raising an exception.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
enhancements:
- |
Make the `SerperDevWebSearch` more robust when `snippet` is not present in the request response.
2 changes: 2 additions & 0 deletions test/components/evaluators/test_llm_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def test_to_dict_default(self, monkeypatch):
"instructions": "test-instruction",
"inputs": [("predicted_answers", List[str])],
"outputs": ["score"],
"progress_bar": True,
"examples": [
{"inputs": {"predicted_answers": "Football is the most popular sport."}, "outputs": {"score": 0}}
],
Expand Down Expand Up @@ -266,6 +267,7 @@ def test_to_dict_with_parameters(self, monkeypatch):
"instructions": "test-instruction",
"inputs": [("predicted_answers", List[str])],
"outputs": ["custom_score"],
"progress_bar": True,
"examples": [
{
"inputs": {"predicted_answers": "Damn, this is straight outta hell!!!"},
Expand Down
10 changes: 7 additions & 3 deletions test/components/readers/test_extractive.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,17 @@ def test_from_dict_no_token():
assert component.token is None


def test_run_no_docs(mock_reader: ExtractiveReader):
mock_reader.warm_up()
assert mock_reader.run(query="hello", documents=[]) == {"answers": []}


def test_output(mock_reader: ExtractiveReader):
answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)[
"answers"
] # [0] Uncomment and remove first two indices when batching support is reintroduced
answers = mock_reader.run(example_queries[0], example_documents[0], top_k=3)["answers"]
doc_ids = set()
no_answer_prob = 1
for doc, answer in zip(example_documents[0], answers[:3]):
assert answer.document_offset is not None
assert answer.document_offset.start == 11
assert answer.document_offset.end == 16
assert doc.content is not None
Expand Down
15 changes: 15 additions & 0 deletions test/components/websearch/test_serperdev.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import json
import os
from unittest.mock import Mock, patch
from haystack.utils.auth import Secret
Expand Down Expand Up @@ -111,6 +113,15 @@ def mock_serper_dev_search_result():
yield mock_run


@pytest.fixture
def mock_serper_dev_search_result_no_snippet():
resp = {**EXAMPLE_SERPERDEV_RESPONSE}
del resp["organic"][0]["snippet"]
with patch("haystack.components.websearch.serper_dev.requests") as mock_run:
mock_run.post.return_value = Mock(status_code=200, json=lambda: resp)
yield mock_run


class TestSerperDevSearchAPI:
def test_init_fail_wo_api_key(self, monkeypatch):
monkeypatch.delenv("SERPERDEV_API_KEY", raising=False)
Expand Down Expand Up @@ -142,6 +153,10 @@ def test_web_search_top_k(self, mock_serper_dev_search_result, top_k: int):
assert all(isinstance(link, str) for link in links)
assert all(link.startswith("http") for link in links)

def test_no_snippet(self, mock_serper_dev_search_result_no_snippet):
ws = SerperDevWebSearch(api_key=Secret.from_token("test-api-key"), top_k=1)
ws.run(query="Who is the boyfriend of Olivia Wilde?")

@patch("requests.post")
def test_timeout_error(self, mock_post):
mock_post.side_effect = Timeout
Expand Down

0 comments on commit 63551dd

Please sign in to comment.