Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Passing the meta-data in the summarizer response #2179

Merged
merged 7 commits into from Jul 11, 2022
5 changes: 3 additions & 2 deletions haystack/nodes/summarizer/transformers.py
Expand Up @@ -168,8 +168,9 @@ def predict(

result: List[Document] = []

for context, summarized_answer in zip(contexts, summaries):
cur_doc = Document(content=summarized_answer["summary_text"], meta={"context": context})
for context, summarized_answer, document in zip(contexts, summaries, documents):
ZanSara marked this conversation as resolved.
Show resolved Hide resolved
cur_doc = Document(content=summarized_answer["summary_text"], meta=document.meta)
cur_doc.meta.update({"context": context})
result.append(cur_doc)

return result
48 changes: 47 additions & 1 deletion test/test_summarizer.py
Expand Up @@ -2,7 +2,7 @@

from haystack.schema import Document
from haystack.pipelines import SearchSummarizationPipeline
from haystack.nodes import DensePassageRetriever, EmbeddingRetriever
from haystack.nodes import DensePassageRetriever, EmbeddingRetriever, TransformersSummarizer

DOCS = [
Document(
Expand Down Expand Up @@ -94,3 +94,49 @@ def test_summarization_pipeline_one_summary(document_store, retriever, summarize
answers = output["answers"]
assert len(answers) == 1
assert answers[0]["answer"] in EXPECTED_ONE_SUMMARIES

@pytest.mark.slow
@pytest.mark.summarizer
def add_metadata_summerizer():
ZanSara marked this conversation as resolved.
Show resolved Hide resolved
docs = [
Document(
content="""PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow.""",
meta={
"sub_content" : "Pegasus Example",
"topic": "California's Electricity",
"context": "Dummy - PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires."
}
),
Document(
content="""The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.""",
meta = {
"sub_content" : "Paris best tour best tour",
"topic": "Eiffel tower"
}
)
]
#Original input is overwrote after the "predict". So adding the same input as check_output to assess the output
ZanSara marked this conversation as resolved.
Show resolved Hide resolved
check_output = [
Document(
content="""PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow.""",
meta={
"sub_content" : "Pegasus Example",
"topic": "California's Electricity",
"context": "Dummy - PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires."
}
),
Document(
content="""The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.""",
meta = {
"sub_content" : "Paris best tour best tour",
"topic": "Eiffel tower"
}
)
]

summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
summary = summarizer.predict(documents=docs)

assert len(summary[0].meta) == len(check_output[0].meta)
assert len(summary[1].meta) - 1 == len(check_output[1].meta)
assert summary[0].meta["context"] == """PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""