From 0a7dfc1b32cd98a7fd7a2d6345cc0b120b8bb18a Mon Sep 17 00:00:00 2001
From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
Date: Fri, 23 Feb 2024 14:05:57 +0100
Subject: [PATCH 01/10] Revert "Add `AnswerExactMatchEvaluator` (#7050)"
 (#7075)

This reverts commit b4011af8e9bc4ae2f72e51db254bfda69e20b651.
---
 haystack/components/evaluators/__init__.py    |  3 -
 .../evaluators/answer_exact_match.py          | 49 ---------------
 ...xact-match-evaluator-197bb87b65e19d0c.yaml |  6 --
 .../evaluators/test_answer_exact_match.py     | 61 -------------------
 4 files changed, 119 deletions(-)
 delete mode 100644 haystack/components/evaluators/__init__.py
 delete mode 100644 haystack/components/evaluators/answer_exact_match.py
 delete mode 100644 releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
 delete mode 100644 test/components/evaluators/test_answer_exact_match.py

diff --git a/haystack/components/evaluators/__init__.py b/haystack/components/evaluators/__init__.py
deleted file mode 100644
index 9550a5f42..000000000
--- a/haystack/components/evaluators/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .answer_exact_match import AnswerExactMatchEvaluator
-
-__all__ = ["AnswerExactMatchEvaluator"]
diff --git a/haystack/components/evaluators/answer_exact_match.py b/haystack/components/evaluators/answer_exact_match.py
deleted file mode 100644
index eb509e8be..000000000
--- a/haystack/components/evaluators/answer_exact_match.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from typing import Any, Dict, List
-
-from haystack import default_from_dict, default_to_dict
-from haystack.core.component import component
-
-
-@component
-class AnswerExactMatchEvaluator:
-    """
-    Evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
-    The result is a number from 0.0 to 1.0, it represents the proportion of questions where any predicted answer
-    matched one of the ground truth answers.
-    Each question can have multiple ground truth answers and multiple predicted answers.
-    """
-
-    def to_dict(self) -> Dict[str, Any]:
-        return default_to_dict(self)
-
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "AnswerExactMatchEvaluator":
-        return default_from_dict(cls, data)
-
-    @component.output_types(result=float)
-    def run(
-        self, questions: List[str], ground_truth_answers: List[List[str]], predicted_answers: List[List[str]]
-    ) -> Dict[str, float]:
-        """
-        Run the AnswerExactMatchEvaluator on the given inputs.
-        All lists must have the same length.
-
-        :param questions: A list of questions.
-        :param ground_truth_answers: A list of expected answers for each question.
-        :param predicted_answers: A list of predicted answers for each question.
-        :returns: A dictionary with the following outputs:
-                * `result` - A number from 0.0 to 1.0 that represents the proportion of questions where any predicted
-                answer matched one of the ground truth answers.
-        """
-        if not len(questions) == len(ground_truth_answers) == len(predicted_answers):
-            raise ValueError("The length of questions, ground_truth_answers, and predicted_answers must be the same.")
-
-        matches = 0
-        for truths, extracted in zip(ground_truth_answers, predicted_answers):
-            if set(truths) & set(extracted):
-                matches += 1
-
-        # The proportion of questions where any predicted answer matched one of the ground truth answers
-        result = matches / len(questions)
-
-        return {"result": result}
diff --git a/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml b/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
deleted file mode 100644
index ad380617d..000000000
--- a/releasenotes/notes/exact-match-evaluator-197bb87b65e19d0c.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-features:
-  - |
-    Add `AnswerExactMatchEvaluator`, a Component that can be used to calculate the Exact Match metric
-    given a list of questions, a list of expected answers for each question and the list of predicted
-    answers for each question.
diff --git a/test/components/evaluators/test_answer_exact_match.py b/test/components/evaluators/test_answer_exact_match.py
deleted file mode 100644
index c179c74a2..000000000
--- a/test/components/evaluators/test_answer_exact_match.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import pytest
-
-from haystack.components.evaluators import AnswerExactMatchEvaluator
-
-
-def test_run_with_all_matching():
-    evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Berlin"], ["Paris"]],
-    )
-
-    assert result["result"] == 1.0
-
-
-def test_run_with_no_matching():
-    evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Paris"], ["London"]],
-    )
-
-    assert result["result"] == 0.0
-
-
-def test_run_with_partial_matching():
-    evaluator = AnswerExactMatchEvaluator()
-    result = evaluator.run(
-        questions=["What is the capital of Germany?", "What is the capital of France?"],
-        ground_truth_answers=[["Berlin"], ["Paris"]],
-        predicted_answers=[["Berlin"], ["London"]],
-    )
-
-    assert result["result"] == 0.5
-
-
-def test_run_with_different_lengths():
-    evaluator = AnswerExactMatchEvaluator()
-
-    with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?"],
-            ground_truth_answers=[["Berlin"], ["Paris"]],
-            predicted_answers=[["Berlin"], ["London"]],
-        )
-
-    with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
-            ground_truth_answers=[["Berlin"]],
-            predicted_answers=[["Berlin"], ["London"]],
-        )
-
-    with pytest.raises(ValueError):
-        evaluator.run(
-            questions=["What is the capital of Germany?", "What is the capital of France?"],
-            ground_truth_answers=[["Berlin"], ["Paris"]],
-            predicted_answers=[["Berlin"]],
-        )

From f5645a4454062902e1eda35e2ca5b4d4832aa64e Mon Sep 17 00:00:00 2001
From: Vladimir Blagojevic <dovlex@gmail.com>
Date: Fri, 23 Feb 2024 14:34:16 +0100
Subject: [PATCH 02/10] Update API docs to include OutputAdapter,
 OpenAPIServiceConnector and OpenAPIServiceToFunctions (#7076)

---
 docs/pydoc/config/connectors.yml     | 26 ++++++++++++++++++++++++++
 docs/pydoc/config/converters_api.yml |  4 ++--
 2 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 docs/pydoc/config/connectors.yml

diff --git a/docs/pydoc/config/connectors.yml b/docs/pydoc/config/connectors.yml
new file mode 100644
index 000000000..3f4133e7a
--- /dev/null
+++ b/docs/pydoc/config/connectors.yml
@@ -0,0 +1,26 @@
+loaders:
+  - type: haystack_pydoc_tools.loaders.CustomPythonLoader
+    search_path: [../../../haystack/components/connectors]
+    modules: ["openapi_service"]
+    ignore_when_discovered: ["__init__"]
+processors:
+  - type: filter
+    expression:
+    documented_only: true
+    do_not_filter_modules: false
+    skip_empty_modules: true
+  - type: smart
+  - type: crossref
+renderer:
+  type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
+  excerpt: Various connectors to integrate with external services.
+  category_slug: haystack-api
+  title: Connectors
+  slug: connectors-api
+  order: 15
+  markdown:
+    descriptive_class_title: false
+    descriptive_module_title: true
+    add_method_class_prefix: true
+    add_member_class_prefix: false
+    filename: connectors_api.md
diff --git a/docs/pydoc/config/converters_api.yml b/docs/pydoc/config/converters_api.yml
index 4fbb21584..6ffb34947 100644
--- a/docs/pydoc/config/converters_api.yml
+++ b/docs/pydoc/config/converters_api.yml
@@ -1,7 +1,7 @@
 loaders:
   - type: haystack_pydoc_tools.loaders.CustomPythonLoader
     search_path: [../../../haystack/components/converters]
-    modules: ["azure", "html", "markdown", "pypdf", "tika", "txt"]
+    modules: ["azure", "html", "markdown", "pypdf", "tika", "txt", "output_adapter", "openapi_functions"]
     ignore_when_discovered: ["__init__"]
 processors:
   - type: filter
@@ -13,7 +13,7 @@ processors:
   - type: crossref
 renderer:
   type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
-  excerpt: Extracts text from files in different formats and converts it into a unified Document format.
+  excerpt: Various converters to transform data from one format to another.
   category_slug: haystack-api
   title: Converters
   slug: converters-api

From 79b32cd755bf06c48f4a6858925a1072c9dfcd0a Mon Sep 17 00:00:00 2001
From: Tobias Wochinger <tobias.wochinger@deepset.ai>
Date: Fri, 23 Feb 2024 16:58:39 +0100
Subject: [PATCH 03/10] build: fix CVE-2022-40897 (#7080)

---
 docker/Dockerfile.base | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base
index ccca98029..c5c4c76f1 100644
--- a/docker/Dockerfile.base
+++ b/docker/Dockerfile.base
@@ -19,7 +19,9 @@ WORKDIR /opt/haystack
 RUN python3 -m venv --system-site-packages /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 
+# Upgrade setuptools due to https://nvd.nist.gov/vuln/detail/CVE-2022-40897
 RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -U setuptools && \
     pip install --no-cache-dir .
 
 FROM $base_image AS final

From 3b8af365e31a2444eaee41244971dfe49f3ca9df Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Mon, 26 Feb 2024 09:25:07 +0100
Subject: [PATCH 04/10] restore transparent background for Pipeline images
 (#7086)

---
 haystack/core/pipeline/draw.py                                | 2 +-
 .../notes/pipe-draw-transparent-bg-2e0c8ff586f8e70c.yaml      | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 releasenotes/notes/pipe-draw-transparent-bg-2e0c8ff586f8e70c.yaml

diff --git a/haystack/core/pipeline/draw.py b/haystack/core/pipeline/draw.py
index d96e6ecc2..e7d22e441 100644
--- a/haystack/core/pipeline/draw.py
+++ b/haystack/core/pipeline/draw.py
@@ -70,7 +70,7 @@ def _to_mermaid_image(graph: networkx.MultiDiGraph):
     graphbytes = graph_styled.encode("ascii")
     base64_bytes = base64.b64encode(graphbytes)
     base64_string = base64_bytes.decode("ascii")
-    url = "https://mermaid.ink/img/" + base64_string
+    url = f"https://mermaid.ink/img/{base64_string}?type=png"
 
     logging.debug("Rendeding graph at %s", url)
     try:
diff --git a/releasenotes/notes/pipe-draw-transparent-bg-2e0c8ff586f8e70c.yaml b/releasenotes/notes/pipe-draw-transparent-bg-2e0c8ff586f8e70c.yaml
new file mode 100644
index 000000000..4a831edb3
--- /dev/null
+++ b/releasenotes/notes/pipe-draw-transparent-bg-2e0c8ff586f8e70c.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    Restore transparent background for images generated with Pipeline.draw and Pipeline.show

From d66b5358a1e005342c23b63c03afa8bc7a57b608 Mon Sep 17 00:00:00 2001
From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
Date: Mon, 26 Feb 2024 12:27:15 +0100
Subject: [PATCH 05/10] Remove eval end to end tests (#7093)

---
 e2e/pipelines/test_eval_dense_doc_search.py   |  84 -----
 .../test_eval_extractive_qa_pipeline.py       | 166 ---------
 .../test_eval_hybrid_doc_search_pipeline.py   |  99 ------
 e2e/pipelines/test_eval_rag_pipelines.py      | 336 ------------------
 4 files changed, 685 deletions(-)
 delete mode 100644 e2e/pipelines/test_eval_dense_doc_search.py
 delete mode 100644 e2e/pipelines/test_eval_extractive_qa_pipeline.py
 delete mode 100644 e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py
 delete mode 100644 e2e/pipelines/test_eval_rag_pipelines.py

diff --git a/e2e/pipelines/test_eval_dense_doc_search.py b/e2e/pipelines/test_eval_dense_doc_search.py
deleted file mode 100644
index d70a4c517..000000000
--- a/e2e/pipelines/test_eval_dense_doc_search.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from haystack import Pipeline
-from haystack.components.converters import PyPDFToDocument, TextFileToDocument
-from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
-from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
-from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
-from haystack.components.routers import FileTypeRouter
-from haystack.components.joiners import DocumentJoiner
-from haystack.components.writers import DocumentWriter
-from haystack.dataclasses import Document
-from haystack.document_stores.in_memory import InMemoryDocumentStore
-from haystack.evaluation.eval import eval
-
-
-def test_dense_doc_search_pipeline(samples_path):
-    # Create the indexing pipeline
-    indexing_pipeline = Pipeline()
-    indexing_pipeline.add_component(
-        instance=FileTypeRouter(mime_types=["text/plain", "application/pdf"]), name="file_type_router"
-    )
-    indexing_pipeline.add_component(instance=TextFileToDocument(), name="text_file_converter")
-    indexing_pipeline.add_component(instance=PyPDFToDocument(), name="pdf_file_converter")
-    indexing_pipeline.add_component(instance=DocumentJoiner(), name="joiner")
-    indexing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner")
-    indexing_pipeline.add_component(
-        instance=DocumentSplitter(split_by="sentence", split_length=250, split_overlap=30), name="splitter"
-    )
-    indexing_pipeline.add_component(
-        instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder"
-    )
-    indexing_pipeline.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="writer")
-
-    indexing_pipeline.connect("file_type_router.text/plain", "text_file_converter.sources")
-    indexing_pipeline.connect("file_type_router.application/pdf", "pdf_file_converter.sources")
-    indexing_pipeline.connect("text_file_converter.documents", "joiner.documents")
-    indexing_pipeline.connect("pdf_file_converter.documents", "joiner.documents")
-    indexing_pipeline.connect("joiner.documents", "cleaner.documents")
-    indexing_pipeline.connect("cleaner.documents", "splitter.documents")
-    indexing_pipeline.connect("splitter.documents", "embedder.documents")
-    indexing_pipeline.connect("embedder.documents", "writer.documents")
-
-    indexing_pipeline.run({"file_type_router": {"sources": list(samples_path.iterdir())}})
-    filled_document_store = indexing_pipeline.get_component("writer").document_store
-
-    # Create the querying pipeline
-    query_pipeline = Pipeline()
-    query_pipeline.add_component(
-        instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder"
-    )
-    query_pipeline.add_component(
-        instance=InMemoryEmbeddingRetriever(document_store=filled_document_store, top_k=20), name="embedding_retriever"
-    )
-    query_pipeline.connect("text_embedder", "embedding_retriever")
-
-    inputs = [{"text_embedder": {"text": "Who lives in Rome?"}}]
-    expected_outputs = [
-        {
-            "embedding_retriever": {
-                "documents": [
-                    Document(
-                        id="d219162e5d0b8e5eab901e32ce0d9c12d24e5ea26a92780442fcfa560eb0b7d6",
-                        content="My name is Giorgio and I live in Rome.",
-                        meta={
-                            "file_path": "/home/ashwin/data_science/0ashwin/opensource/haystack/e2e/samples/doc_1.txt",
-                            "source_id": "0366ae1654f4573564e29184cd4a2232286a93f4f25d6790ce703ae7d4d7d63c",
-                        },
-                        score=0.627746287158654,
-                    ),
-                    Document(
-                        id="2dcf2bc0307ba21fbb7e97a307d987a05297e577a44f170081acdbab9fc4b95f",
-                        content="A sample PDF ﬁle History and standardizationFormat (PDF) Adobe Systems made the PDF speciﬁcation ava...",
-                        meta={"source_id": "ec1ac6c430ecd0cc74ae56f3e2d84f93fef3f5393de6901fe8aa01e494ebcdbe"},
-                        score=-0.060180130727963355,
-                    ),
-                ]
-            }
-        }
-    ]
-
-    eval_result = eval(query_pipeline, inputs=inputs, expected_outputs=expected_outputs)
-
-    assert eval_result.inputs == inputs
-    assert eval_result.expected_outputs == expected_outputs
-    assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
-    assert eval_result.runnable.to_dict() == query_pipeline.to_dict()
diff --git a/e2e/pipelines/test_eval_extractive_qa_pipeline.py b/e2e/pipelines/test_eval_extractive_qa_pipeline.py
deleted file mode 100644
index 989b2713b..000000000
--- a/e2e/pipelines/test_eval_extractive_qa_pipeline.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import json
-import pytest
-
-from haystack import Pipeline
-from haystack.components.readers import ExtractiveReader
-from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
-from haystack.dataclasses import Document, ExtractedAnswer
-from haystack.document_stores.in_memory import InMemoryDocumentStore
-from haystack.evaluation.eval import eval
-from haystack.evaluation.metrics import Metric
-
-
-def test_extractive_qa_pipeline(tmp_path):
-    # Create the pipeline
-    qa_pipeline = Pipeline()
-    qa_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever")
-    qa_pipeline.add_component(instance=ExtractiveReader(model="deepset/tinyroberta-squad2"), name="reader")
-    qa_pipeline.connect("retriever", "reader")
-
-    # Populate the document store
-    documents = [
-        Document(content="My name is Jean and I live in Paris."),
-        Document(content="My name is Mark and I live in Berlin."),
-        Document(content="My name is Giorgio and I live in Rome."),
-    ]
-    qa_pipeline.get_component("retriever").document_store.write_documents(documents)
-
-    # Query and assert
-    questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
-    inputs = [{"retriever": {"query": question}, "reader": {"query": question, "top_k": 1}} for question in questions]
-    expected_outputs = [
-        {
-            "reader": {
-                "answers": [
-                    ExtractedAnswer(
-                        query="Who lives in Paris?",
-                        score=0.7713339924812317,
-                        data="Jean and I",
-                        document=Document(content="My name is Jean and I live in Paris.", score=0.33144005810482535),
-                        context=None,
-                        document_offset=ExtractedAnswer.Span(start=11, end=21),
-                        context_offset=None,
-                        meta={},
-                    ),
-                    ExtractedAnswer(
-                        query="Who lives in Paris?",
-                        score=0.2286660075187683,
-                        data=None,
-                        document=None,
-                        context=None,
-                        document_offset=None,
-                        context_offset=None,
-                        meta={},
-                    ),
-                ]
-            }
-        },
-        {
-            "reader": {
-                "answers": [
-                    ExtractedAnswer(
-                        query="Who lives in Berlin?",
-                        score=0.7047999501228333,
-                        data="Mark and I",
-                        document=Document(content="My name is Mark and I live in Berlin.", score=0.33144005810482535),
-                        context=None,
-                        document_offset=ExtractedAnswer.Span(start=11, end=21),
-                        context_offset=None,
-                        meta={},
-                    ),
-                    ExtractedAnswer(
-                        query="Who lives in Berlin?",
-                        score=0.29520004987716675,
-                        data=None,
-                        document=None,
-                        context=None,
-                        document_offset=None,
-                        context_offset=None,
-                        meta={},
-                    ),
-                ]
-            }
-        },
-        {
-            "reader": {
-                "answers": [
-                    ExtractedAnswer(
-                        query="Who lives in Rome?",
-                        score=0.7661304473876953,
-                        data="Giorgio and I",
-                        document=Document(content="My name is Giorgio and I live in Rome.", score=0.33144005810482535),
-                        context=None,
-                        document_offset=ExtractedAnswer.Span(start=11, end=24),
-                        context_offset=None,
-                        meta={},
-                    ),
-                    ExtractedAnswer(
-                        query="Who lives in Rome?",
-                        score=0.2338695526123047,
-                        data=None,
-                        document=None,
-                        context=None,
-                        document_offset=None,
-                        context_offset=None,
-                        meta={},
-                    ),
-                ]
-            }
-        },
-    ]
-
-    eval_result = eval(qa_pipeline, inputs=inputs, expected_outputs=expected_outputs)
-
-    assert eval_result.inputs == inputs
-    assert eval_result.expected_outputs == expected_outputs
-    assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
-    assert eval_result.runnable.to_dict() == qa_pipeline.to_dict()
-
-    # Test Exact Match
-    em_default = eval_result.calculate_metrics(Metric.EM, output_key="answers")
-    em_custom_parameters = eval_result.calculate_metrics(
-        Metric.EM, output_key="answers", ignore_case=True, ignore_punctuation=True, ignore_numbers=True
-    )
-    # Save EM metric results to json
-    em_default.save(tmp_path / "exact_match_score.json")
-
-    assert em_default["exact_match"] == 1.0
-    assert em_custom_parameters["exact_match"] == 1.0
-    with open(tmp_path / "exact_match_score.json", "r") as f:
-        assert em_default == json.load(f)
-
-    # Test F1
-    f1_default = eval_result.calculate_metrics(Metric.F1, output_key="answers")
-    f1_custom_parameters = eval_result.calculate_metrics(
-        Metric.F1, output_key="answers", ignore_case=True, ignore_punctuation=True, ignore_numbers=True
-    )
-    # Save F1 metric results to json
-    f1_default.save(tmp_path / "f1_score.json")
-
-    assert f1_default["f1"] == 1.0
-    assert f1_custom_parameters["f1"] == 1.0
-    with open(tmp_path / "f1_score.json", "r") as f:
-        assert f1_default == json.load(f)
-
-    # Test SAS
-    sas_default = eval_result.calculate_metrics(
-        Metric.SAS, output_key="answers", model="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
-    )
-    sas_custom_parameters = eval_result.calculate_metrics(
-        Metric.SAS,
-        output_key="answers",
-        ignore_case=True,
-        ignore_punctuation=True,
-        ignore_numbers=True,
-        model="cross-encoder/ms-marco-MiniLM-L-6-v2",
-    )
-    # Save SAS metric results to json
-    sas_default.save(tmp_path / "sas_score.json")
-
-    assert sas_default["sas"] == pytest.approx(1.0)
-    assert sas_default["scores"] == pytest.approx([1.0, 1.0, 1.0])
-    assert sas_custom_parameters["sas"] == pytest.approx(0.9996823, abs=1e-5)
-    assert sas_custom_parameters["scores"] == pytest.approx([0.999672, 0.999608, 0.999767])
-
-    with open(tmp_path / "sas_score.json", "r") as f:
-        assert sas_default == json.load(f)
diff --git a/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py b/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py
deleted file mode 100644
index 0cd838bf6..000000000
--- a/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from haystack import Document, Pipeline
-from haystack.components.embedders import SentenceTransformersTextEmbedder
-from haystack.components.rankers import TransformersSimilarityRanker
-from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
-from haystack.components.joiners.document_joiner import DocumentJoiner
-from haystack.document_stores.in_memory import InMemoryDocumentStore
-from haystack.evaluation.eval import eval
-
-
-def test_hybrid_doc_search_pipeline():
-    # Create the pipeline
-    document_store = InMemoryDocumentStore()
-    hybrid_pipeline = Pipeline()
-    hybrid_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="bm25_retriever")
-    hybrid_pipeline.add_component(
-        instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder"
-    )
-    hybrid_pipeline.add_component(
-        instance=InMemoryEmbeddingRetriever(document_store=document_store), name="embedding_retriever"
-    )
-    hybrid_pipeline.add_component(instance=DocumentJoiner(), name="joiner")
-    hybrid_pipeline.add_component(instance=TransformersSimilarityRanker(top_k=2), name="ranker")
-
-    hybrid_pipeline.connect("bm25_retriever", "joiner")
-    hybrid_pipeline.connect("text_embedder", "embedding_retriever")
-    hybrid_pipeline.connect("embedding_retriever", "joiner")
-    hybrid_pipeline.connect("joiner", "ranker")
-
-    # Populate the document store
-    documents = [
-        Document(content="My name is Jean and I live in Paris."),
-        Document(content="My name is Mark and I live in Berlin."),
-        Document(content="My name is Mario and I live in the capital of Italy."),
-        Document(content="My name is Giorgio and I live in Rome."),
-    ]
-    hybrid_pipeline.get_component("bm25_retriever").document_store.write_documents(documents)
-
-    questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
-    inputs = [
-        {"bm25_retriever": {"query": question}, "text_embedder": {"text": question}, "ranker": {"query": question}}
-        for question in questions
-    ]
-    expected_outputs = [
-        {
-            "ranker": {
-                "documents": [
-                    Document(
-                        id="6c90b78ad94e4e634e2a067b5fe2d26d4ce95405ec222cbaefaeb09ab4dce81e",
-                        content="My name is Jean and I live in Paris.",
-                        score=2.2277960777282715,
-                    ),
-                    Document(
-                        id="10a183e965c2e107e20507c717f16559c58a8ba4bc7c577ea8dc32a8d6ca7a20",
-                        content="My name is Mark and I live in Berlin.",
-                        score=-7.304897308349609,
-                    ),
-                ]
-            }
-        },
-        {
-            "ranker": {
-                "documents": [
-                    Document(
-                        id="10a183e965c2e107e20507c717f16559c58a8ba4bc7c577ea8dc32a8d6ca7a20",
-                        content="My name is Mark and I live in Berlin.",
-                        score=3.694173812866211,
-                    ),
-                    Document(
-                        id="f7533b5c6c968680d0ef8e38f366d4e68b7ac0d7238f1b1b366d15cb9c33efd8",
-                        content="My name is Mario and I live in the capital of Italy.",
-                        score=-9.008655548095703,
-                    ),
-                ]
-            }
-        },
-        {
-            "ranker": {
-                "documents": [
-                    Document(
-                        id="fb0f1efe94b3c78aa1c4e5a17a5ef8270f70e89d36a3665c8362675e8a769a27",
-                        content="My name is Giorgio and I live in Rome.",
-                        score=3.487802028656006,
-                    ),
-                    Document(
-                        id="f7533b5c6c968680d0ef8e38f366d4e68b7ac0d7238f1b1b366d15cb9c33efd8",
-                        content="My name is Mario and I live in the capital of Italy.",
-                        score=-2.873128890991211,
-                    ),
-                ]
-            }
-        },
-    ]
-
-    eval_result = eval(hybrid_pipeline, inputs=inputs, expected_outputs=expected_outputs)
-
-    assert eval_result.inputs == inputs
-    assert eval_result.expected_outputs == expected_outputs
-    assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
-    assert eval_result.runnable.to_dict() == hybrid_pipeline.to_dict()
diff --git a/e2e/pipelines/test_eval_rag_pipelines.py b/e2e/pipelines/test_eval_rag_pipelines.py
deleted file mode 100644
index 7dc512977..000000000
--- a/e2e/pipelines/test_eval_rag_pipelines.py
+++ /dev/null
@@ -1,336 +0,0 @@
-import json
-import pytest
-
-from haystack import Pipeline
-from haystack.components.builders.answer_builder import AnswerBuilder
-from haystack.components.builders.prompt_builder import PromptBuilder
-from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
-from haystack.components.generators import HuggingFaceLocalGenerator
-from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
-from haystack.components.writers import DocumentWriter
-from haystack.dataclasses import Document, GeneratedAnswer
-from haystack.document_stores.in_memory import InMemoryDocumentStore
-from haystack.evaluation.eval import eval
-from haystack.evaluation.metrics import Metric
-
-
-def test_bm25_rag_pipeline(tmp_path):
-    prompt_template = """
-    Given these documents, answer the question.\nDocuments:
-    {% for doc in documents %}
-        {{ doc.content }}
-    {% endfor %}
-
-    \nQuestion: {{question}}
-    \nAnswer:
-    """
-    rag_pipeline = Pipeline()
-    rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever")
-    rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
-    rag_pipeline.add_component(
-        instance=HuggingFaceLocalGenerator(
-            model="google/flan-t5-small",
-            task="text2text-generation",
-            generation_kwargs={"max_new_tokens": 100, "temperature": 0.5, "do_sample": True},
-        ),
-        name="llm",
-    )
-    rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
-    rag_pipeline.connect("retriever", "prompt_builder.documents")
-    rag_pipeline.connect("prompt_builder", "llm")
-    rag_pipeline.connect("llm.replies", "answer_builder.replies")
-    rag_pipeline.connect("retriever", "answer_builder.documents")
-
-    # Populate the document store
-    documents = [
-        Document(content="My name is Jean and I live in Paris."),
-        Document(content="My name is Mark and I live in Berlin."),
-        Document(content="My name is Giorgio and I live in Rome."),
-    ]
-    rag_pipeline.get_component("retriever").document_store.write_documents(documents)
-
-    questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
-    inputs = [
-        {
-            "retriever": {"query": question},
-            "prompt_builder": {"question": question},
-            "answer_builder": {"query": question},
-        }
-        for question in questions
-    ]
-
-    expected_outputs = [
-        {
-            "answer_builder": {
-                "answers": [
-                    GeneratedAnswer(
-                        data="Jean",
-                        query="Who lives in Paris?",
-                        documents=[
-                            Document(content="My name is Jean and I live in Paris.", score=0.33144005810482535),
-                            Document(content="My name is Giorgio and I live in Rome.", score=-0.17938556566116537),
-                            Document(content="My name is Mark and I live in Berlin.", score=-0.17938556566116537),
-                        ],
-                        meta={},
-                    )
-                ]
-            }
-        },
-        {
-            "answer_builder": {
-                "answers": [
-                    GeneratedAnswer(
-                        data="Mark",
-                        query="Who lives in Berlin?",
-                        documents=[
-                            Document(content="My name is Mark and I live in Berlin.", score=0.33144005810482535),
-                            Document(content="My name is Giorgio and I live in Rome.", score=-0.17938556566116537),
-                            Document(content="My name is Jean and I live in Paris.", score=-0.17938556566116537),
-                        ],
-                        meta={},
-                    )
-                ]
-            }
-        },
-        {
-            "answer_builder": {
-                "answers": [
-                    GeneratedAnswer(
-                        data="Giorgio",
-                        query="Who lives in Rome?",
-                        documents=[
-                            Document(content="My name is Giorgio and I live in Rome.", score=0.33144005810482535),
-                            Document(content="My name is Mark and I live in Berlin.", score=-0.17938556566116537),
-                            Document(content="My name is Jean and I live in Paris.", score=-0.17938556566116537),
-                        ],
-                        meta={},
-                    )
-                ]
-            }
-        },
-    ]
-
-    eval_result = eval(rag_pipeline, inputs=inputs, expected_outputs=expected_outputs)
-
-    assert eval_result.inputs == inputs
-    assert eval_result.expected_outputs == expected_outputs
-    assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
-    assert eval_result.runnable.to_dict() == rag_pipeline.to_dict()
-
-    # Test Exact Match
-    em_default = eval_result.calculate_metrics(Metric.EM, output_key="answers")
-    em_custom_parameters = eval_result.calculate_metrics(
-        Metric.EM, output_key="answers", ignore_case=True, ignore_punctuation=True, ignore_numbers=True
-    )
-    # Save EM metric results to json
-    em_default.save(tmp_path / "exact_match_score.json")
-
-    assert em_default["exact_match"] == 1.0
-    assert em_custom_parameters["exact_match"] == 1.0
-    with open(tmp_path / "exact_match_score.json", "r") as f:
-        assert em_default == json.load(f)
-
-    # Test F1
-    f1_default = eval_result.calculate_metrics(Metric.F1, output_key="answers")
-    f1_custom_parameters = eval_result.calculate_metrics(
-        Metric.F1, output_key="answers", ignore_case=True, ignore_punctuation=True, ignore_numbers=True
-    )
-    # Save F1 metric results to json
-    f1_default.save(tmp_path / "f1_score.json")
-
-    assert f1_default["f1"] == 1.0
-    assert f1_custom_parameters["f1"] == 1.0
-    with open(tmp_path / "f1_score.json", "r") as f:
-        assert f1_default == json.load(f)
-
-    # Test SAS
-    sas_default = eval_result.calculate_metrics(
-        Metric.SAS, output_key="answers", model="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
-    )
-    sas_custom_parameters = eval_result.calculate_metrics(
-        Metric.SAS,
-        output_key="answers",
-        ignore_case=True,
-        ignore_punctuation=True,
-        ignore_numbers=True,
-        model="cross-encoder/ms-marco-MiniLM-L-6-v2",
-    )
-    # Save SAS metric results to json
-    sas_default.save(tmp_path / "sas_score.json")
-
-    assert sas_default["sas"] == pytest.approx(1.0)
-    assert sas_default["scores"] == pytest.approx([1.0, 1.0, 1.0])
-    assert sas_custom_parameters["sas"] == pytest.approx(0.9769593, abs=1e-5)
-    assert sas_custom_parameters["scores"] == pytest.approx([0.975823, 0.957218, 0.997837], abs=1e-5)
-
-    with open(tmp_path / "sas_score.json", "r") as f:
-        assert sas_default == json.load(f)
-
-
-def test_embedding_retrieval_rag_pipeline(tmp_path):
-    # Create the RAG pipeline
-    prompt_template = """
-    Given these documents, answer the question.\nDocuments:
-    {% for doc in documents %}
-        {{ doc.content }}
-    {% endfor %}
-
-    \nQuestion: {{question}}
-    \nAnswer:
-    """
-    rag_pipeline = Pipeline()
-    rag_pipeline.add_component(
-        instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder"
-    )
-    rag_pipeline.add_component(
-        instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever"
-    )
-    rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
-    rag_pipeline.add_component(
-        instance=HuggingFaceLocalGenerator(
-            model="google/flan-t5-small",
-            task="text2text-generation",
-            generation_kwargs={"max_new_tokens": 100, "temperature": 0.5, "do_sample": True},
-        ),
-        name="llm",
-    )
-    rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
-    rag_pipeline.connect("text_embedder", "retriever")
-    rag_pipeline.connect("retriever", "prompt_builder.documents")
-    rag_pipeline.connect("prompt_builder", "llm")
-    rag_pipeline.connect("llm.replies", "answer_builder.replies")
-    rag_pipeline.connect("retriever", "answer_builder.documents")
-
-    # Populate the document store
-    documents = [
-        Document(content="My name is Jean and I live in Paris."),
-        Document(content="My name is Mark and I live in Berlin."),
-        Document(content="My name is Giorgio and I live in Rome."),
-    ]
-    document_store = rag_pipeline.get_component("retriever").document_store
-    indexing_pipeline = Pipeline()
-    indexing_pipeline.add_component(
-        instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"),
-        name="document_embedder",
-    )
-    indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="document_writer")
-    indexing_pipeline.connect("document_embedder", "document_writer")
-    indexing_pipeline.run({"document_embedder": {"documents": documents}})
-
-    # Query and assert
-    questions = ["Who lives in Paris?", "Who lives in Berlin?", "Who lives in Rome?"]
-    inputs = [
-        {
-            "prompt_builder": {"question": question},
-            "text_embedder": {"text": question},
-            "answer_builder": {"query": question},
-        }
-        for question in questions
-    ]
-
-    expected_outputs = [
-        {
-            "answer_builder": {
-                "answers": [
-                    GeneratedAnswer(
-                        data="Jean",
-                        query="Who lives in Paris?",
-                        documents=[
-                            Document(content="My name is Jean and I live in Paris.", score=0.33144005810482535),
-                            Document(content="My name is Giorgio and I live in Rome.", score=-0.17938556566116537),
-                            Document(content="My name is Mark and I live in Berlin.", score=-0.17938556566116537),
-                        ],
-                        meta={},
-                    )
-                ]
-            }
-        },
-        {
-            "answer_builder": {
-                "answers": [
-                    GeneratedAnswer(
-                        data="Mark",
-                        query="Who lives in Berlin?",
-                        documents=[
-                            Document(content="My name is Mark and I live in Berlin.", score=0.33144005810482535),
-                            Document(content="My name is Giorgio and I live in Rome.", score=-0.17938556566116537),
-                            Document(content="My name is Jean and I live in Paris.", score=-0.17938556566116537),
-                        ],
-                        meta={},
-                    )
-                ]
-            }
-        },
-        {
-            "answer_builder": {
-                "answers": [
-                    GeneratedAnswer(
-                        data="Giorgio",
-                        query="Who lives in Rome?",
-                        documents=[
-                            Document(content="My name is Giorgio and I live in Rome.", score=0.33144005810482535),
-                            Document(content="My name is Mark and I live in Berlin.", score=-0.17938556566116537),
-                            Document(content="My name is Jean and I live in Paris.", score=-0.17938556566116537),
-                        ],
-                        meta={},
-                    )
-                ]
-            }
-        },
-    ]
-
-    eval_result = eval(rag_pipeline, inputs=inputs, expected_outputs=expected_outputs)
-
-    assert eval_result.inputs == inputs
-    assert eval_result.expected_outputs == expected_outputs
-    assert len(eval_result.outputs) == len(expected_outputs) == len(inputs)
-    assert eval_result.runnable.to_dict() == rag_pipeline.to_dict()
-
-    # Test Exact Match
-    em_default = eval_result.calculate_metrics(Metric.EM, output_key="answers")
-    em_custom_parameters = eval_result.calculate_metrics(
-        Metric.EM, output_key="answers", ignore_case=True, ignore_punctuation=True, ignore_numbers=True
-    )
-    # Save EM metric results to json
-    em_default.save(tmp_path / "exact_match_score.json")
-
-    assert em_default["exact_match"] == 1.0
-    assert em_custom_parameters["exact_match"] == 1.0
-    with open(tmp_path / "exact_match_score.json", "r") as f:
-        assert em_default == json.load(f)
-
-    # Test F1
-    f1_default = eval_result.calculate_metrics(Metric.F1, output_key="answers")
-    f1_custom_parameters = eval_result.calculate_metrics(
-        Metric.F1, output_key="answers", ignore_case=True, ignore_punctuation=True, ignore_numbers=True
-    )
-    # Save F1 metric results to json
-    f1_default.save(tmp_path / "f1_score.json")
-
-    assert f1_default["f1"] == 1.0
-    assert f1_custom_parameters["f1"] == 1.0
-    with open(tmp_path / "f1_score.json", "r") as f:
-        assert f1_default == json.load(f)
-
-    # Test SAS
-    sas_default = eval_result.calculate_metrics(
-        Metric.SAS, output_key="answers", model="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
-    )
-    sas_custom_parameters = eval_result.calculate_metrics(
-        Metric.SAS,
-        output_key="answers",
-        ignore_case=True,
-        ignore_punctuation=True,
-        ignore_numbers=True,
-        model="cross-encoder/ms-marco-MiniLM-L-6-v2",
-    )
-    # Save SAS metric results to json
-    sas_default.save(tmp_path / "sas_score.json")
-
-    assert sas_default["sas"] == pytest.approx(1.0)
-    assert sas_default["scores"] == pytest.approx([1.0, 1.0, 1.0])
-    assert sas_custom_parameters["sas"] == pytest.approx(0.9769593, abs=1e-5)
-    assert sas_custom_parameters["scores"] == pytest.approx([0.975823, 0.957218, 0.997837], abs=1e-5)
-
-    with open(tmp_path / "sas_score.json", "r") as f:
-        assert sas_default == json.load(f)

From ba49905eff2bee7617bb338dcc5060088e0116ff Mon Sep 17 00:00:00 2001
From: Tobias Wochinger <tobias.wochinger@deepset.ai>
Date: Mon, 26 Feb 2024 15:40:10 +0100
Subject: [PATCH 06/10] ci: unify dependency management + hatch scripts (#7079)

* ci: unify dependency management + hatch scripts

* ci: migrate readme sync

* build: migrate snippets

* ci: pin hatch

* ci: make Python version more explicit + quote

* ci: add scripts with parameters to hatch

---------

Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
---
 .github/workflows/docstring_labeler.yml     |  5 +-
 .github/workflows/e2e.yml                   | 14 ++---
 .github/workflows/license_compliance.yml    |  3 +-
 .github/workflows/linting.yml               | 20 +++----
 .github/workflows/minor_version_release.yml |  2 +-
 .github/workflows/pypi_release.yml          |  5 +-
 .github/workflows/readme_sync.yml           | 14 +++--
 .github/workflows/snippets_tests.yml        | 14 ++---
 .github/workflows/tests.yml                 | 41 +++++++-------
 pyproject.toml                              | 61 ++++++++++++++++++++-
 test/test_requirements.txt                  | 30 ----------
 11 files changed, 118 insertions(+), 91 deletions(-)
 delete mode 100644 test/test_requirements.txt

diff --git a/.github/workflows/docstring_labeler.yml b/.github/workflows/docstring_labeler.yml
index 96384817d..1dc5ddd9d 100644
--- a/.github/workflows/docstring_labeler.yml
+++ b/.github/workflows/docstring_labeler.yml
@@ -5,6 +5,9 @@ on:
     paths:
       - "haystack/**/*.py"
 
+env:
+  PYTHON_VERSION: "3.11"
+
 jobs:
   label:
     runs-on: ubuntu-latest
@@ -24,7 +27,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Get docstrings
         id: base-docstrings
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 694b39e16..22440932c 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -18,6 +18,7 @@ on:
 env:
   PYTHON_VERSION: "3.8"
   OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  HATCH_VERSION: "1.9.3"
 
 jobs:
   run:
@@ -28,15 +29,10 @@ jobs:
 
     - uses: actions/setup-python@v5
       with:
-        python-version: ${{ env.PYTHON_VERSION }}
+        python-version: "${{ env.PYTHON_VERSION }}"
 
-    - name: Install dependencies
-      run: |
-        sudo apt-get update
-        sudo apt install ffmpeg  # for local Whisper tests
-
-    - name: Install Haystack and the dependencies needed for tests
-      run: pip install -r test/test_requirements.txt
+    - name: Install Hatch
+      run: pip install hatch==${{ env.HATCH_VERSION }}
 
     - name: Run tests
-      run: pytest e2e
+      run: hatch run test:e2e
diff --git a/.github/workflows/license_compliance.yml b/.github/workflows/license_compliance.yml
index aeb5abcdb..9fa159d83 100644
--- a/.github/workflows/license_compliance.yml
+++ b/.github/workflows/license_compliance.yml
@@ -11,6 +11,7 @@ on:
 
 env:
   CORE_DATADOG_API_KEY: ${{ secrets.CORE_DATADOG_API_KEY }}
+  PYTHON_VERSION: "3.10"
 
 jobs:
   license_check_direct:
@@ -25,7 +26,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Get direct dependencies
         run: |
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 4557f9206..e3ece2104 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -12,6 +12,7 @@ on:
 
 env:
   PYTHON_VERSION: "3.8"
+  HATCH_VERSION: "1.9.3"
 
 jobs:
   mypy:
@@ -35,19 +36,16 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
-      - name: Install Haystack
-        run: pip install .[dev]
-
-      - name: Install the dependencies needed for tests
-        run: pip install -r test/test_requirements.txt
+      - name: Install Hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
 
       - name: Mypy
         if: steps.files.outputs.any_changed == 'true'
         run: |
           mkdir .mypy_cache
-          mypy --install-types --non-interactive --cache-dir=.mypy_cache/ ${{ steps.files.outputs.all_changed_files }}
+          hatch run test:types ${{ steps.files.outputs.all_changed_files }}
 
   pylint:
     runs-on: ubuntu-latest
@@ -68,12 +66,12 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
-      - name: Install Haystack and the dependencies needed for tests
-        run: pip install -r test/test_requirements.txt
+      - name: Install Hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
 
       - name: Pylint
         if: steps.files.outputs.any_changed == 'true'
         run: |
-          pylint -ry -j 0 ${{ steps.files.outputs.all_changed_files }}
+          hatch run test:lint ${{ steps.files.outputs.all_changed_files }}
diff --git a/.github/workflows/minor_version_release.yml b/.github/workflows/minor_version_release.yml
index 5b174fa0d..6233ccea7 100644
--- a/.github/workflows/minor_version_release.yml
+++ b/.github/workflows/minor_version_release.yml
@@ -49,7 +49,7 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Install release_docs.py dependencies
         run: pip install requests
diff --git a/.github/workflows/pypi_release.yml b/.github/workflows/pypi_release.yml
index 86f983cc3..c784c08f7 100644
--- a/.github/workflows/pypi_release.yml
+++ b/.github/workflows/pypi_release.yml
@@ -5,6 +5,9 @@ on:
     tags:
       - "v[0-9].[0-9]+.[0-9]+*"
 
+env:
+  HATCH_VERSION: "1.9.3"
+
 jobs:
   release-on-pypi:
     runs-on: ubuntu-latest
@@ -14,7 +17,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Install Hatch
-        run: pip install hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
 
       - name: Build Haystack
         run: hatch build
diff --git a/.github/workflows/readme_sync.yml b/.github/workflows/readme_sync.yml
index a6126f7b3..679eb75ce 100644
--- a/.github/workflows/readme_sync.yml
+++ b/.github/workflows/readme_sync.yml
@@ -8,6 +8,10 @@ on:
     branches:
       - main
 
+env:
+  HATCH_VERSION: "1.9.3"
+  PYTHON_VERSION: "3.10"
+
 jobs:
   sync:
     runs-on: ubuntu-latest
@@ -18,12 +22,10 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "${{ env.PYTHON_VERSION }}"
 
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -U haystack-pydoc-tools
+      - name: Install Hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
 
       - name: Generate API docs
         env:
@@ -31,7 +33,7 @@ jobs:
           # from Readme.io as we need them to associate the slug
           # in config files with their id.
           README_API_KEY: ${{ secrets.README_API_KEY }}
-        run: ./.github/utils/pydoc-markdown.sh
+        run: hatch run readme:sync
 
       - name: Sync docs with 2.0
         # Sync the docs to the `2.0` version on Readme
diff --git a/.github/workflows/snippets_tests.yml b/.github/workflows/snippets_tests.yml
index d64c2fd70..148a1acb4 100644
--- a/.github/workflows/snippets_tests.yml
+++ b/.github/workflows/snippets_tests.yml
@@ -17,6 +17,7 @@ on:
 env:
   OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
   PYTHON_VERSION: "3.8"
+  HATCH_VERSION: "1.9.3"
 
 jobs:
   black:
@@ -26,7 +27,7 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Install Black
         run: |
@@ -100,13 +101,10 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
-      - name: Install snippets dependencies
-        run: |
-          pip install --upgrade pip
-          pip install "." torch
-          pip install pydantic
+      - name: Install Hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
 
       - name: Get changed files
         id: files
@@ -119,7 +117,7 @@ jobs:
         run: |
           CHANGED_FILES=${{ steps.files.outputs.all_changed_files }}
           for file in $CHANGED_FILES; do
-            python "$file"
+            hatch run snippets:python "$file"
           done
 
       - name: Calculate alert data
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 39d4c9fe6..71866c4eb 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -29,6 +29,7 @@ env:
   AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
   PYTHON_VERSION: "3.8"
+  HATCH_VERSION: "1.9.3"
 
 jobs:
   black:
@@ -38,19 +39,13 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
-      - name: Install Black
-        run: |
-          pip install --upgrade pip
-          pip install .[dev]
+      - name: Install Hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
 
       - name: Check status
-        run: |
-          if ! black . --check; then
-            git status
-            exit 1
-          fi
+        run: hatch run default:format-check
 
       - name: Calculate alert data
         id: calculator
@@ -96,10 +91,14 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
-      - name: Install Haystack and the dependencies needed for tests
-        run: pip install -r test/test_requirements.txt
+      - name: Install Hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
+
+      - name: Install dependencies
+        # To actually install and sync the dependencies
+        run: hatch run test:pip list
 
       - uses: actions/cache@v4
         with:
@@ -122,7 +121,7 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Restore Python dependencies
         uses: actions/cache/restore@v4
@@ -131,7 +130,7 @@ jobs:
           key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml', 'test/test_requirements.txt') }}
 
       - name: Run
-        run: pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" test
+        run: hatch run test:unit
 
       - name: Coveralls
         # We upload only coverage for ubuntu as handling both os
@@ -186,7 +185,7 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Install dependencies
         run: |
@@ -200,7 +199,7 @@ jobs:
           key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml', 'test/test_requirements.txt') }}
 
       - name: Run
-        run: pytest --maxfail=5 -m "integration" test
+        run: hatch run test:integration
 
       - name: Calculate alert data
         id: calculator
@@ -245,7 +244,7 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Install dependencies
         run: |
@@ -258,7 +257,7 @@ jobs:
           key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml', 'test/test_requirements.txt') }}
 
       - name: Run
-        run: pytest --maxfail=5 -m "integration" test -k 'not tika'
+        run: hatch run test:integration-mac
 
       - name: Calculate alert data
         id: calculator
@@ -300,7 +299,7 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: ${{ env.PYTHON_VERSION }}
+          python-version: "${{ env.PYTHON_VERSION }}"
 
       - name: Restore Python dependencies
         uses: actions/cache/restore@v4
@@ -309,7 +308,7 @@ jobs:
           key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml', 'test/test_requirements.txt') }}
 
       - name: Run
-        run: pytest --maxfail=5 -m "integration" test -k 'not tika'
+        run: hatch run test:integration-windows
 
       - name: Calculate alert data
         id: calculator
diff --git a/pyproject.toml b/pyproject.toml
index ba5b8dec7..2372310ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,8 +61,8 @@ dependencies = [
   "boilerpy3", # Fulltext extraction from HTML pages
 ]
 
-[project.optional-dependencies]
-dev = [
+[tool.hatch.envs.default]
+dependencies = [
   "pre-commit",
   # Type check
   "mypy",
@@ -91,6 +91,63 @@ dev = [
   "black[jupyter]~=23.0",
 ]
 
+[tool.hatch.envs.default.scripts]
+format = "black ."
+format-check = "black --check ."
+
+[tool.hatch.envs.test]
+extra-dependencies = [
+  "transformers[torch,sentencepiece]==4.37.2",  # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
+  "spacy>=3.7,<3.8",  # NamedEntityExtractor
+  "spacy-curated-transformers>=0.2,<=0.3",  # NamedEntityExtractor
+  "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl",  # NamedEntityExtractor
+
+  # Converters
+  "pypdf",  # PyPDFConverter
+  "markdown-it-py",  # MarkdownToDocument
+  "mdit_plain",  # MarkdownToDocument
+  "tika",  # TikaDocumentConverter
+  "azure-ai-formrecognizer>=3.2.0b2",  # AzureOCRDocumentConverter
+  "langdetect",  # TextLanguageRouter and DocumentLanguageClassifier
+  "sentence-transformers>=2.2.0",  # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
+  "openai-whisper>=20231106",  # LocalWhisperTranscriber
+
+  # OpenAPI
+  "jsonref",  # OpenAPIServiceConnector, OpenAPIServiceToFunctions
+  "openapi3",
+
+  # Validation
+  "jsonschema",
+
+  # Tracing
+  "opentelemetry-sdk",
+  "ddtrace",
+]
+
+[tool.hatch.envs.test.scripts]
+e2e = "pytest e2e"
+unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" test'
+integration = 'pytest --maxfail=5 -m "integration" test'
+integration-mac = 'pytest --maxfail=5 -m "integration" test -k "not tika"'
+integration-windows = 'pytest --maxfail=5 -m "integration" test -k "not tika"'
+types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}"
+lint = "pylint -ry -j 0 {args:haystack}"
+
+[tool.hatch.envs.readme]
+detached = true  # To avoid installing the dependencies from the default environment
+dependencies = [
+  "haystack-pydoc-tools",
+]
+
+[tool.hatch.envs.readme.scripts]
+sync = "./.github/utils/pydoc-markdown.sh"
+
+[tool.hatch.envs.snippets]
+extra-dependencies = [
+  "torch",
+  "pydantic",
+]
+
 [project.urls]
 "CI: GitHub" = "https://github.com/deepset-ai/haystack/actions"
 "Docs: RTD" = "https://haystack.deepset.ai/overview/intro"
diff --git a/test/test_requirements.txt b/test/test_requirements.txt
deleted file mode 100644
index 2e1626729..000000000
--- a/test/test_requirements.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-.[dev]
-
-# Package                                        Components
-
-transformers[torch,sentencepiece]==4.37.2       # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
-spacy>=3.7,<3.8                                 # NamedEntityExtractor
-spacy-curated-transformers>=0.2,<=0.3           # NamedEntityExtractor
-https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl # NamedEntityExtractor
-
-# Converters
-pypdf                                           # PyPDFConverter
-markdown-it-py                                  # MarkdownToDocument
-mdit_plain                                      # MarkdownToDocument
-tika                                            # TikaDocumentConverter
-azure-ai-formrecognizer>=3.2.0b2                # AzureOCRDocumentConverter
-
-langdetect                                      # TextLanguageRouter and DocumentLanguageClassifier
-sentence-transformers>=2.2.0                    # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
-openai-whisper>=20231106                        # LocalWhisperTranscriber
-
-# OpenAPI
-jsonref                                        # OpenAPIServiceConnector, OpenAPIServiceToFunctions
-openapi3
-
-# Validation
-jsonschema
-
-# Tracing
-opentelemetry-sdk
-ddtrace

From 22e9def2cd29ad70b70530fa9737390eafe80073 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 26 Feb 2024 16:06:26 +0100
Subject: [PATCH 07/10] chore: Remove deprecated GPTGenerator and
 GPTChatGenerator (#7125)

* remove deprecated GPTGenerator and GPTChatGenerator

* remove unused import
---
 haystack/components/generators/__init__.py    | 10 ++-----
 .../components/generators/chat/__init__.py    |  3 +-
 haystack/components/generators/chat/openai.py | 27 -----------------
 haystack/components/generators/openai.py      | 29 -------------------
 .../remove-gptgenerator-8eced280d3b720d3.yaml |  4 +++
 5 files changed, 7 insertions(+), 66 deletions(-)
 create mode 100644 releasenotes/notes/remove-gptgenerator-8eced280d3b720d3.yaml

diff --git a/haystack/components/generators/__init__.py b/haystack/components/generators/__init__.py
index 73818abfb..30407bba8 100644
--- a/haystack/components/generators/__init__.py
+++ b/haystack/components/generators/__init__.py
@@ -1,12 +1,6 @@
 from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator
 from haystack.components.generators.hugging_face_tgi import HuggingFaceTGIGenerator
-from haystack.components.generators.openai import OpenAIGenerator, GPTGenerator
+from haystack.components.generators.openai import OpenAIGenerator
 from haystack.components.generators.azure import AzureOpenAIGenerator
 
-__all__ = [
-    "HuggingFaceLocalGenerator",
-    "HuggingFaceTGIGenerator",
-    "OpenAIGenerator",
-    "GPTGenerator",
-    "AzureOpenAIGenerator",
-]
+__all__ = ["HuggingFaceLocalGenerator", "HuggingFaceTGIGenerator", "OpenAIGenerator", "AzureOpenAIGenerator"]
diff --git a/haystack/components/generators/chat/__init__.py b/haystack/components/generators/chat/__init__.py
index 5400e158a..c8f908e15 100644
--- a/haystack/components/generators/chat/__init__.py
+++ b/haystack/components/generators/chat/__init__.py
@@ -1,6 +1,6 @@
 from haystack.components.generators.chat.hugging_face_local import HuggingFaceLocalChatGenerator
 from haystack.components.generators.chat.hugging_face_tgi import HuggingFaceTGIChatGenerator
-from haystack.components.generators.chat.openai import OpenAIChatGenerator, GPTChatGenerator
+from haystack.components.generators.chat.openai import OpenAIChatGenerator
 from haystack.components.generators.chat.azure import AzureOpenAIChatGenerator
 
 
@@ -8,6 +8,5 @@
     "HuggingFaceLocalChatGenerator",
     "HuggingFaceTGIChatGenerator",
     "OpenAIChatGenerator",
-    "GPTChatGenerator",
     "AzureOpenAIChatGenerator",
 ]
diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py
index 9268caa3e..9b3ad6a0a 100644
--- a/haystack/components/generators/chat/openai.py
+++ b/haystack/components/generators/chat/openai.py
@@ -2,7 +2,6 @@
 import dataclasses
 import json
 import logging
-import warnings
 from typing import Optional, List, Callable, Dict, Any, Union
 
 from openai import OpenAI, Stream  # type: ignore
@@ -331,29 +330,3 @@ def _check_finish_reason(self, message: ChatMessage) -> None:
             logger.warning(
                 "The completion for index %s has been truncated due to the content filter.", message.meta["index"]
             )
-
-
-class GPTChatGenerator(OpenAIChatGenerator):
-    def __init__(
-        self,
-        api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
-        model: str = "gpt-3.5-turbo",
-        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
-        api_base_url: Optional[str] = None,
-        organization: Optional[str] = None,
-        generation_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        warnings.warn(
-            "GPTChatGenerator is deprecated and will be removed in the next beta release. "
-            "Please use OpenAIChatGenerator instead.",
-            UserWarning,
-            stacklevel=2,
-        )
-        super().__init__(
-            api_key=api_key,
-            model=model,
-            streaming_callback=streaming_callback,
-            api_base_url=api_base_url,
-            organization=organization,
-            generation_kwargs=generation_kwargs,
-        )
diff --git a/haystack/components/generators/openai.py b/haystack/components/generators/openai.py
index 472ede780..caf7a8434 100644
--- a/haystack/components/generators/openai.py
+++ b/haystack/components/generators/openai.py
@@ -1,6 +1,5 @@
 import dataclasses
 import logging
-import warnings
 from typing import Optional, List, Callable, Dict, Any, Union
 
 from openai import OpenAI, Stream
@@ -277,31 +276,3 @@ def _check_finish_reason(self, message: ChatMessage) -> None:
             logger.warning(
                 "The completion for index %s has been truncated due to the content filter.", message.meta["index"]
             )
-
-
-class GPTGenerator(OpenAIGenerator):
-    def __init__(
-        self,
-        api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
-        model: str = "gpt-3.5-turbo",
-        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
-        api_base_url: Optional[str] = None,
-        organization: Optional[str] = None,
-        system_prompt: Optional[str] = None,
-        generation_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        warnings.warn(
-            "GPTGenerator is deprecated and will be removed in the next beta release. "
-            "Please use OpenAIGenerator instead.",
-            UserWarning,
-            stacklevel=2,
-        )
-        super().__init__(
-            api_key=api_key,
-            model=model,
-            streaming_callback=streaming_callback,
-            api_base_url=api_base_url,
-            organization=organization,
-            system_prompt=system_prompt,
-            generation_kwargs=generation_kwargs,
-        )
diff --git a/releasenotes/notes/remove-gptgenerator-8eced280d3b720d3.yaml b/releasenotes/notes/remove-gptgenerator-8eced280d3b720d3.yaml
new file mode 100644
index 000000000..694db015e
--- /dev/null
+++ b/releasenotes/notes/remove-gptgenerator-8eced280d3b720d3.yaml
@@ -0,0 +1,4 @@
+---
+upgrade:
+  - |
+    Removed the deprecated GPTGenerator and GPTChatGenerator components. Use OpenAIGenerator and OpenAIChatGeneratornotes instead.

From 8838c02872f7cea42e1b97524383a56c43860fb0 Mon Sep 17 00:00:00 2001
From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
Date: Mon, 26 Feb 2024 18:04:43 +0100
Subject: [PATCH 08/10] Fix snippets_tests.yml (#7208)

---
 .github/workflows/snippets_tests.yml | 31 +++-------------------------
 1 file changed, 3 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/snippets_tests.yml b/.github/workflows/snippets_tests.yml
index 148a1acb4..a5b1eec9d 100644
--- a/.github/workflows/snippets_tests.yml
+++ b/.github/workflows/snippets_tests.yml
@@ -29,36 +29,11 @@ jobs:
         with:
           python-version: "${{ env.PYTHON_VERSION }}"
 
-      - name: Install Black
-        run: |
-          pip install --upgrade pip
-          pip install .[dev]
+      - name: Install Hatch
+        run: pip install hatch==${{ env.HATCH_VERSION }}
 
       - name: Check status
-        run: |
-          if ! black . --check; then
-            git status
-            echo "###################################################################################################"
-            echo "# "
-            echo "# CHECK FAILED! Black found issues with your code formatting."
-            echo "# "
-            echo "# Either:"
-            echo "# 1. Run Black locally before committing:"
-            echo "# "
-            echo "#     pip install .[formatting]"
-            echo "#     black ."
-            echo "# "
-            echo "# 2. Install the pre-commit hook:"
-            echo "# "
-            echo "#     pre-commit install"
-            echo "# "
-            echo "# 3. See https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help."
-            echo "# "
-            echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack/issues"
-            echo "# "
-            echo "##################################################################################################"
-            exit 1
-          fi
+        run: hatch run default:format-check
 
       - name: Calculate alert data
         id: calculator

From e0b692467bb94781f079c4a9719e223e09aa9bab Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Mon, 26 Feb 2024 18:06:29 +0100
Subject: [PATCH 09/10] `TopPSampler` docstrings (#7205)

* TopPSampler docstrings

* inline quote
---
 haystack/components/samplers/top_p.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/haystack/components/samplers/top_p.py b/haystack/components/samplers/top_p.py
index 4f85fc8fc..bd4846683 100644
--- a/haystack/components/samplers/top_p.py
+++ b/haystack/components/samplers/top_p.py
@@ -16,8 +16,8 @@ class TopPSampler:
     """
     Implements top-p (nucleus) sampling for document filtering based on cumulative probability scores.
 
-    This class provides functionality to filter a list of documents by selecting those whose scores fall
-    within the top 'p' percent of the cumulative distribution. The method is useful for focusing on high-probability
+    This component provides functionality to filter a list of documents by selecting those whose scores fall
+    within the top 'p' percent of the cumulative distribution. It is useful for focusing on high-probability
     documents while filtering out less relevant ones based on their assigned scores.
 
     Usage example:
@@ -44,9 +44,9 @@ def __init__(self, top_p: float = 1.0, score_field: Optional[str] = None):
         Creates an instance of TopPSampler.
 
         :param top_p: Float between 0 and 1 representing the cumulative probability threshold for document selection.
-        Defaults to 1.0, indicating no filtering (all documents are retained).
+            A value of 1.0 indicates no filtering (all documents are retained).
         :param score_field: Name of the field in each document's metadata that contains the score. If None, the default
-        document score field is used.
+            document score field is used.
         """
         torch_import.check()
 
@@ -57,17 +57,14 @@ def __init__(self, top_p: float = 1.0, score_field: Optional[str] = None):
     def run(self, documents: List[Document], top_p: Optional[float] = None):
         """
         Filters documents using top-p sampling based on their scores.
+        If the specified top_p results in no documents being selected (especially in cases of a low top_p value), the
+        method returns the document with the highest similarity score.
 
         :param documents: List of Document objects to be filtered.
         :param top_p: Optional. A float to override the cumulative probability threshold set during initialization.
-                 If None, the class's top_p value is used.
-        :return: A dictionary with a key 'documents' containing the list of filtered Document objects.
-
-        This method applies top-p sampling to filter out documents. It selects those documents whose similarity scores
-        are within the top 'p' percent of the cumulative distribution, based on the specified or default top_p value.
 
-        If the specified top_p results in no documents being selected (especially in cases of a low top_p value), the
-        method defaults to returning the document with the highest similarity score.
+        :returns: A dictionary with the following key:
+            - `documents`: List of Document objects that have been selected based on the top-p sampling.
 
         :raises ValueError: If the top_p value is not within the range [0, 1].
         """

From 2a591280ab43aba52bfd5cf61c2b0056c5655b98 Mon Sep 17 00:00:00 2001
From: Tobias Wochinger <tobias.wochinger@deepset.ai>
Date: Tue, 27 Feb 2024 09:15:01 +0100
Subject: [PATCH 10/10] feat: implement support for structured logging (#7126)

* feat: implement support for structured logging

* docs: add release notes

* style: add explanatory comment

* chore: test + import fixes

* tests: fix windows tests
---
 haystack/__init__.py                          |   5 +
 haystack/logging.py                           |  97 +++++++++
 pyproject.toml                                |   3 +
 .../structured-logging-2d6cef3fee2b4f0e.yaml  |  18 ++
 test/test_logging.py                          | 206 ++++++++++++++++++
 test/tracing/test_datadog.py                  |   2 -
 6 files changed, 329 insertions(+), 2 deletions(-)
 create mode 100644 haystack/logging.py
 create mode 100644 releasenotes/notes/structured-logging-2d6cef3fee2b4f0e.yaml
 create mode 100644 test/test_logging.py

diff --git a/haystack/__init__.py b/haystack/__init__.py
index b20a367e3..dd4e910b8 100644
--- a/haystack/__init__.py
+++ b/haystack/__init__.py
@@ -3,6 +3,11 @@
 from haystack.core.pipeline import Pipeline
 from haystack.core.serialization import default_from_dict, default_to_dict
 from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer
+import haystack.logging
+
+# Initialize the logging configuration
+# This is a no-op unless `structlog` is installed
+haystack.logging.configure_logging()
 
 __all__ = [
     "component",
diff --git a/haystack/logging.py b/haystack/logging.py
new file mode 100644
index 000000000..6f3ea1d6f
--- /dev/null
+++ b/haystack/logging.py
@@ -0,0 +1,97 @@
+import logging
+import os
+import typing
+from typing import List
+
+if typing.TYPE_CHECKING:
+    from structlog.typing import Processor
+
+HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON"
+HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG"
+
+
+def configure_logging(use_json: bool = False) -> None:
+    """Configure logging for Haystack.
+
+    - If `structlog` is not installed, we keep everything as it is. The user is responsible for configuring logging
+      themselves.
+    - If `structlog` is installed, we configure it to format log entries including its key-value data. To disable this
+      behavior set the environment variable `HAYSTACK_LOGGING_IGNORE_STRUCTLOG` to `true`.
+    - If `structlog` is installed, you can JSON format all logs. Enable this by
+        - setting the `use_json` parameter to `True` when calling this function
+        - setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true`
+    """
+    try:
+        import structlog
+        from structlog.processors import ExceptionRenderer
+        from structlog.tracebacks import ExceptionDictTransformer
+
+    except ImportError:
+        # structlog is not installed - fall back to standard logging
+        return
+
+    if os.getenv(HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR, "false").lower() == "true":
+        # If the user wants to ignore structlog, we don't configure it and fall back to standard logging
+        return
+
+    # We roughly follow the structlog documentation here:
+    # https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging
+    # This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`.
+
+    shared_processors: List[Processor] = [
+        # Add the log level to the event_dict for structlog to use
+        structlog.stdlib.add_log_level,
+        # Adds the current timestamp in ISO format to logs
+        structlog.processors.TimeStamper(fmt="iso"),
+    ]
+
+    structlog.configure(
+        processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+        # This is a filter that will filter out log entries that are below the log level of the root logger.
+        wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()),
+    )
+
+    renderers: List[Processor]
+    if os.getenv(HAYSTACK_LOGGING_USE_JSON_ENV_VAR, "false").lower() == "true" or use_json:
+        renderers = [
+            ExceptionRenderer(
+                # don't show locals in production logs - this can be quite sensitive information
+                ExceptionDictTransformer(show_locals=False)
+            ),
+            structlog.processors.JSONRenderer(),
+        ]
+    else:
+        renderers = [structlog.dev.ConsoleRenderer()]
+
+    formatter = structlog.stdlib.ProcessorFormatter(
+        # These run ONLY on `logging` entries that do NOT originate within
+        # structlog.
+        foreign_pre_chain=shared_processors
+        + [
+            # Add the information from the `logging` `extras` to the event dictionary
+            structlog.stdlib.ExtraAdder()
+        ],
+        # These run on ALL entries after the pre_chain is done.
+        processors=[
+            # Remove _record & _from_structlog. to avoid that this metadata is added to the final log record
+            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
+            *renderers,
+        ],
+    )
+
+    handler = logging.StreamHandler()
+    handler.name = "HaystackLoggingHandler"
+    # Use OUR `ProcessorFormatter` to format all `logging` entries.
+    handler.setFormatter(formatter)
+
+    root_logger = logging.getLogger()
+    # avoid adding our handler twice
+    old_handlers = [
+        h
+        for h in root_logger.handlers
+        if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler")
+    ]
+    new_handlers = [handler, *old_handlers]
+    root_logger.handlers = new_handlers
diff --git a/pyproject.toml b/pyproject.toml
index 2372310ef..18e2fc49d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -122,6 +122,9 @@ extra-dependencies = [
   # Tracing
   "opentelemetry-sdk",
   "ddtrace",
+
+  # Structured logging
+  "structlog",
 ]
 
 [tool.hatch.envs.test.scripts]
diff --git a/releasenotes/notes/structured-logging-2d6cef3fee2b4f0e.yaml b/releasenotes/notes/structured-logging-2d6cef3fee2b4f0e.yaml
new file mode 100644
index 000000000..583dea247
--- /dev/null
+++ b/releasenotes/notes/structured-logging-2d6cef3fee2b4f0e.yaml
@@ -0,0 +1,18 @@
+---
+features:
+  - |
+    Haystack now supports structured logging out-of-the box.
+    Logging can be separated into 3 categories:
+    - If [`structlog`](https://www.structlog.org/en/stable/) is not installed, Haystack will
+      use the standard Python logging library with whatever configuration is present.
+    - If `structlog` is installed, Haystack will log through [`structlog`](https://www.structlog.org/en/stable/) using
+      structlog's console renderer.
+      To disable structlog, set the environment variable `HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR` to `true`.
+    - To log in JSON, install [`structlog`](https://www.structlog.org/en/stable/) and
+      - set the environment variable `HAYSTACK_LOGGING_JSON` to `true` or
+      - enable JSON logging from Python
+        ```python
+        import haystack.logging
+
+        haystack.logging.configure_logging(use_json=True)
+        ```
diff --git a/test/test_logging.py b/test/test_logging.py
new file mode 100644
index 000000000..9c212417e
--- /dev/null
+++ b/test/test_logging.py
@@ -0,0 +1,206 @@
+import builtins
+import json
+import logging
+import os
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from unittest.mock import ANY, Mock
+
+import pytest
+from _pytest.capture import CaptureFixture
+from _pytest.logging import LogCaptureFixture
+from _pytest.monkeypatch import MonkeyPatch
+
+from haystack import logging as haystack_logging
+
+
+@pytest.fixture(autouse=True)
+def reset_logging_config() -> None:
+    old_handlers = logging.root.handlers.copy()
+    yield
+    # Reset the logging configuration after each test to avoid impacting other tests
+    logging.root.handlers = old_handlers
+
+
+class TestSkipLoggingConfiguration:
+    def test_skip_logging_configuration(
+        self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
+    ) -> None:
+        monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
+        haystack_logging.configure_logging()
+
+        logger = logging.getLogger(__name__)
+        logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
+
+        # the pytest fixture caplog only captures logs being rendered from the stdlib logging module
+        assert caplog.messages == ["Hello, structured logging!"]
+
+        # Nothing should be captured by capfd since structlog is not configured
+        assert capfd.readouterr().err == ""
+
+    def test_skip_logging_if_structlog_not_installed(
+        self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
+    ) -> None:
+        monkeypatch.delitem(sys.modules, "structlog", raising=False)
+        monkeypatch.setattr(builtins, "__import__", Mock(side_effect=ImportError))
+
+        haystack_logging.configure_logging()
+
+        logger = logging.getLogger(__name__)
+        logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
+
+        # the pytest fixture caplog only captures logs being rendered from the stdlib logging module
+        assert caplog.messages == ["Hello, structured logging!"]
+
+        # Nothing should be captured by capfd since structlog is not configured
+        assert capfd.readouterr().err == ""
+
+
+class TestStructuredLoggingConsoleRendering:
+    def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None:
+        haystack_logging.configure_logging()
+
+        logger = logging.getLogger(__name__)
+        logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
+
+        # Use `capfd` to capture the output of the final structlog rendering result
+        output = capfd.readouterr().err
+        assert output == ""
+
+    def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None:
+        haystack_logging.configure_logging()
+
+        logger = logging.getLogger(__name__)
+        logger.setLevel(logging.DEBUG)
+
+        logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
+
+        # Use `capfd` to capture the output of the final structlog rendering result
+        output = capfd.readouterr().err
+        assert output != ""
+
+    def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None:
+        haystack_logging.configure_logging()
+
+        logger = logging.getLogger(__name__)
+        logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
+
+        # Use `capfd` to capture the output of the final structlog rendering result
+        output = capfd.readouterr().err
+
+        # Only check for the minute to be a bit more robust
+        today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "")
+        assert today in output
+
+        log_level = "warning"
+        assert log_level in output
+
+        assert "Hello, structured logging!" in output
+
+        assert "key1" in output
+        assert "value1" in output
+
+    def test_logging_exceptions(self, capfd: CaptureFixture) -> None:
+        haystack_logging.configure_logging()
+
+        logger = logging.getLogger(__name__)
+
+        def function_that_raises_and_adds_to_stack_trace():
+            raise ValueError("This is an error")
+
+        try:
+            function_that_raises_and_adds_to_stack_trace()
+        except ValueError:
+            logger.exception("An error happened")
+
+        # Use `capfd` to capture the output of the final structlog rendering result
+        output = capfd.readouterr().err
+
+        assert "An error happened" in output
+
+
+class TestStructuredLoggingJSONRendering:
+    def test_logging_as_json(self, capfd: CaptureFixture) -> None:
+        haystack_logging.configure_logging(use_json=True)
+
+        logger = logging.getLogger(__name__)
+        logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
+
+        # Use `capfd` to capture the output of the final structlog rendering result
+        output = capfd.readouterr().err
+        parsed_output = json.loads(output)  # should not raise an error
+
+        assert parsed_output == {
+            "event": "Hello, structured logging!",
+            "key1": "value1",
+            "key2": "value2",
+            "level": "warning",
+            "timestamp": ANY,
+        }
+
+    def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
+        monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
+        haystack_logging.configure_logging()
+
+        logger = logging.getLogger(__name__)
+        logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
+
+        # Use `capfd` to capture the output of the final structlog rendering result
+        output = capfd.readouterr().err
+        parsed_output = json.loads(output)  # should not raise an error
+
+        assert parsed_output == {
+            "event": "Hello, structured logging!",
+            "key1": "value1",
+            "key2": "value2",
+            "level": "warning",
+            "timestamp": ANY,
+        }
+
+    def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None:
+        haystack_logging.configure_logging(use_json=True)
+
+        logger = logging.getLogger(__name__)
+
+        def function_that_raises_and_adds_to_stack_trace():
+            my_local_variable = "my_local_variable"  # noqa: F841
+            raise ValueError("This is an error")
+
+        try:
+            function_that_raises_and_adds_to_stack_trace()
+        except ValueError:
+            logger.exception("An error happened ")
+
+        # Use `capfd` to capture the output of the final structlog rendering result
+        output = capfd.readouterr().err
+        parsed_output = json.loads(output)
+        assert parsed_output == {
+            "event": "An error happened ",
+            "level": "error",
+            "timestamp": ANY,
+            "exception": [
+                {
+                    "exc_type": "ValueError",
+                    "exc_value": "This is an error",
+                    "syntax_error": None,
+                    "is_cause": False,
+                    "frames": [
+                        {
+                            "filename": str(Path.cwd() / "test" / "test_logging.py"),
+                            "lineno": ANY,  # otherwise the test breaks if you add a line :-)
+                            "name": "test_logging_exceptions_json",
+                            "line": "",
+                            "locals": None,
+                        },
+                        {
+                            "filename": str(Path.cwd() / "test" / "test_logging.py"),
+                            "lineno": ANY,  # otherwise the test breaks if you add a line :-)
+                            "name": "function_that_raises_and_adds_to_stack_trace",
+                            "line": "",
+                            "locals": None,
+                        },
+                    ],
+                }
+            ],
+        }
diff --git a/test/tracing/test_datadog.py b/test/tracing/test_datadog.py
index d7e647ae0..9a259a372 100644
--- a/test/tracing/test_datadog.py
+++ b/test/tracing/test_datadog.py
@@ -17,8 +17,6 @@ def datadog_tracer(monkeypatch: MonkeyPatch) -> ddtrace.Tracer:
 
     tracer = ddtrace.Tracer()
 
-    # monkeypatch.setattr(ddtrace, "tracer", tracer)
-
     return tracer