# Haystack demo

https://haystack.deepset.ai/tutorials/34_extractive_qa_pipeline

In [1]:
pip install haystack-ai accelerate "sentence-transformers>=3.0.0" "datasets>=2.6.1"


Collecting haystack-ai
  Downloading haystack_ai-2.6.1-py3-none-any.whl.metadata (13 kB)
Collecting sentence-transformers>=3.0.0
  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Collecting haystack-experimental (from haystack-ai)
  Downloading haystack_experimental-0.2.0-py3-none-any.whl.metadata (11 kB)
Collecting lazy-imports (from haystack-ai)
  Downloading lazy_imports-0.3.1-py3-none-any.whl.metadata (10 kB)
Collecting openai>=1.1.0 (from haystack-ai)
  Downloading openai-1.52.2-py3-none-any.whl.metadata (24 kB)
Collecting posthog (from haystack-ai)
  Downloading posthog-3.7.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting jiter<1,>=0.4.0 (from openai>=1.1.0->haystack-ai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting monotonic>=1.5 (from posthog->haystack-ai)
  Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting backoff>=1.10.0 (from posthog->haystack-ai)
  Down

In [2]:
from datasets import load_dataset
from haystack import Document
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.readers import ExtractiveReader
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter


dataset = load_dataset("bilgeyucel/seven-wonders", split="train")

documents = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]

model = "sentence-transformers/multi-qa-mpnet-base-dot-v1"

document_store = InMemoryDocumentStore()

indexing_pipeline = Pipeline()

indexing_pipeline.add_component(instance=SentenceTransformersDocumentEmbedder(model=model), name="embedder")
indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="writer")
indexing_pipeline.connect("embedder.documents", "writer.documents")

indexing_pipeline.run({"documents": documents})



README.md:   0%|          | 0.00/46.0 [00:00<?, ?B/s]

(…)-00000-of-00001-4077bd623d55100a.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/151 [00:00<?, ? examples/s]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

{'writer': {'documents_written': 151}}

In [3]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.readers import ExtractiveReader
from haystack.components.embedders import SentenceTransformersTextEmbedder


retriever = InMemoryEmbeddingRetriever(document_store=document_store)
reader = ExtractiveReader()
reader.warm_up()

extractive_qa_pipeline = Pipeline()

extractive_qa_pipeline.add_component(instance=SentenceTransformersTextEmbedder(model=model), name="embedder")
extractive_qa_pipeline.add_component(instance=retriever, name="retriever")
extractive_qa_pipeline.add_component(instance=reader, name="reader")

extractive_qa_pipeline.connect("embedder.embedding", "retriever.query_embedding")
extractive_qa_pipeline.connect("retriever.documents", "reader.documents")


config.json:   0%|          | 0.00/729 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/295 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

<haystack.core.pipeline.pipeline.Pipeline object at 0x7c43c00fd9f0>
🚅 Components
  - embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - reader: ExtractiveReader
🛤️ Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> reader.documents (List[Document])

In [4]:
query = "Who was Pliny the Elder?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 2}}
)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='Who was Pliny the Elder?', score=0.8306006193161011, data='Roman writer', document=Document(id=bb2c5f3d2e2e2bf28d599c7b686ab47ba10fbc13c07279e612d8632af81e5d71, content: 'The Roman writer Pliny the Elder, writing in the first century AD, argued that the Great Pyramid had...', meta: {'url': 'https://en.wikipedia.org/wiki/Great_Pyramid_of_Giza', '_split_id': 16}, score: 21.667728268420095), context=None, document_offset=ExtractedAnswer.Span(start=4, end=16), context_offset=None, meta={}),
   ExtractedAnswer(query='Who was Pliny the Elder?', score=0.7280884385108948, data='a Roman author', document=Document(id=8910f21f7c0e97792473bcc60a8dcc7f6a90586dbb46b7bf96d28dbfcdc313f4, content: '[21]
   Pliny the Elder (AD 23/24 – 79) was a Roman author, a naturalist and natural philosopher, a nav...', meta: {'url': 'https://en.wikipedia.org/wiki/Colossus_of_Rhodes', '_split_id': 8}, score: 26.857540364652507), context=None, document_offset=ExtractedAns

In [5]:
query = "Who thought that the walls of Babylon should be a wonder?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 2}}
)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='Who thought that the walls of Babylon should be a wonder?', score=0.5408721566200256, data='Stephanie Dalley', document=Document(id=6b10fe81a7fbed8b3cccea3009351bf20bfb6c53f7b3993223f1f3b97b836992, content: '[24]
   
   Identification with Sennacherib's gardens at Nineveh
   See also: Nineveh § Sennacherib's Nineveh...', meta: {'url': 'https://en.wikipedia.org/wiki/Hanging_Gardens_of_Babylon', '_split_id': 6}, score: 23.359479434769035), context=None, document_offset=ExtractedAnswer.Span(start=116, end=132), context_offset=None, meta={}),
   ExtractedAnswer(query='Who thought that the walls of Babylon should be a wonder?', score=0.4154881238937378, data='Hellenic culture', document=Document(id=d9d010c8de4b44dade963cb84f936a313a56f96e9aaed19f760cf8538ded0e4f, content: 'The Hanging Gardens of Babylon were one of the Seven Wonders of the Ancient World listed by Hellenic...', meta: {'url': 'https://en.wikipedia.org/wiki/Hanging_Gardens_of_Baby

In [6]:
query = "Where is the Porcelain tower?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 2}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='Where is the Porcelain tower?', score=0.5424820184707642, data="between the upper end of the Grand Gallery and the King's Chamber", document=Document(id=88eb71b56b86e994f66df0cfe5be8b82494d064f730a8f59f2dcd92ee6005307, content: '[171]
   To verify and pinpoint the void, a team from Kyushu University, Tohoku University, the Univers...', meta: {'url': 'https://en.wikipedia.org/wiki/Great_Pyramid_of_Giza', '_split_id': 51}, score: 18.043354178274), context=None, document_offset=ExtractedAnswer.Span(start=518, end=583), context_offset=None, meta={}),
   ExtractedAnswer(query='Where is the Porcelain tower?', score=0.3870896100997925, data='the Bodrum', document=Document(id=72d9ba5c859a4737ea03759f89a6d4fb50cab7f04061de6c33e1835ef7f8eedf, content: 'Because of this, Fergusson concluded that the building was ruined, probably by an earthquake, betwee...', meta: {'url': 'https://en.wikipedia.org/wiki/Mausoleum_at_Halicarnassus', '_split_id': 6}, sco

In [7]:
query = "What was destroyed by fire?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 2}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What was destroyed by fire?', score=0.804988443851471, data='Palace of Lausus', document=Document(id=60469411be57782b4a518ee430379e5235c5dff6f279fe232fae0adfb840644d, content: 'The 11th-century Byzantine historian Georgios Kedrenos records a tradition that it was carried off t...', meta: {'url': 'https://en.wikipedia.org/wiki/Statue_of_Zeus_at_Olympia', '_split_id': 5}, score: 17.20918225984975), context=None, document_offset=ExtractedAnswer.Span(start=166, end=182), context_offset=None, meta={}),
   ExtractedAnswer(query='What was destroyed by fire?', score=0.6687393188476562, data='the temple', document=Document(id=60469411be57782b4a518ee430379e5235c5dff6f279fe232fae0adfb840644d, content: 'The 11th-century Byzantine historian Georgios Kedrenos records a tradition that it was carried off t...', meta: {'url': 'https://en.wikipedia.org/wiki/Statue_of_Zeus_at_Olympia', '_split_id': 5}, score: 17.20918225984975), context=None, document_offset

In [8]:
query = "What was commissioned by the Eleans?"
extractive_qa_pipeline.run(
    data={"embedder": {"text": query}, "retriever": {"top_k": 3}, "reader": {"query": query, "top_k": 2}}
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What was commissioned by the Eleans?', score=0.6260069012641907, data='sculptor Phidias', document=Document(id=382f34edec5b00cf78649285c6968c9fefbc8418b410bdd28f13da2226a4c204, content: 'Seeking to outdo their Athenian rivals, the Eleans employed sculptor Phidias, who had previously mad...', meta: {'url': 'https://en.wikipedia.org/wiki/Statue_of_Zeus_at_Olympia', '_split_id': 1}, score: 18.565420468285538), context=None, document_offset=ExtractedAnswer.Span(start=60, end=76), context_offset=None, meta={}),
   ExtractedAnswer(query='What was commissioned by the Eleans?', score=0.5411168932914734, data='a canal', document=Document(id=785075351ac84affb97aca27041e4a412e2e7ac7b3a03c986b5ca6fdd386425c, content: '[36] There was a tradition of Assyrian royal garden building. King Ashurnasirpal II (883–859 BC) had...', meta: {'url': 'https://en.wikipedia.org/wiki/Hanging_Gardens_of_Babylon', '_split_id': 8}, score: 19.397894747677533), context=None

# Conclusion

Results seem crap lol, I was just looking at things and reformulating word for word as questions. None of the above results are correct, at least with the current Wikipedia page on 7 wonders and related pages.

I copied the parameters from the docs