In [None]:
from haystack.telemetry import tutorial_running

tutorial_running(34)

In [1]:
from datasets import load_dataset
from haystack import Document
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.readers import ExtractiveReader
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter

dataset = load_dataset('bilgeyucel/seven-wonders', split='train')
documents = [Document(content=doc['content'], meta=doc['meta']) for doc in dataset]
model = 'sentence-transformers/multi-qa-mpnet-base-dot-v1'
document_store = InMemoryDocumentStore()
indexing_pipeline = Pipeline()

indexing_pipeline.add_component(instance=SentenceTransformersDocumentEmbedder(model=model), name='embedder')
indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name='writer')

indexing_pipeline.connect('embedder.documents', 'writer.documents')

indexing_pipeline.run({'documents': documents})

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

{'writer': {'documents_written': 151}}

In [3]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.readers import ExtractiveReader
from haystack.components.embedders import SentenceTransformersTextEmbedder

retriever = InMemoryEmbeddingRetriever(document_store=document_store)
reader = ExtractiveReader()
reader.warm_up()

extractive_qa_pipeline = Pipeline()

extractive_qa_pipeline.add_component(instance=SentenceTransformersTextEmbedder(model=model), name="embedder")
extractive_qa_pipeline.add_component(instance=retriever, name="retriever")
extractive_qa_pipeline.add_component(instance=reader, name="reader")

extractive_qa_pipeline.connect("embedder.embedding", "retriever.query_embedding")
extractive_qa_pipeline.connect("retriever.documents", "reader.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x000001F3D5D5DF70>
🚅 Components
  - embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - reader: ExtractiveReader
🛤️ Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> reader.documents (List[Document])

In [5]:
query = 'What most likely happened in 1402 when the Knights of St John of Jerusalem arrived?'
extractive_qa_pipeline.run(
  data={
    'embedder': {
      'text': query
    },
    'retriever': {
        'top_k': 3
    },
    'reader': {
        'query': query,
        'top_k': 2
    }
  }
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'reader': {'answers': [ExtractedAnswer(query='What most likely happened in 1402 when the Knights of St John of Jerusalem arrived?', score=0.6509622931480408, data='earthquake', document=Document(id=72d9ba5c859a4737ea03759f89a6d4fb50cab7f04061de6c33e1835ef7f8eedf, content: 'Because of this, Fergusson concluded that the building was ruined, probably by an earthquake, betwee...', meta: {'url': 'https://en.wikipedia.org/wiki/Mausoleum_at_Halicarnassus', '_split_id': 6}, score: 19.51354383276873), context=None, document_offset=ExtractedAnswer.Span(start=82, end=92), context_offset=None, meta={}),
   ExtractedAnswer(query='What most likely happened in 1402 when the Knights of St John of Jerusalem arrived?', score=0.29987215995788574, data='The Knights of St John of Rhodes invaded the region and built Bodrum Castle (Castle of Saint Peter). When they decided to fortify it in 1494, they used the stones of the Mausoleum. This is also about when "imaginative reconstructions" of the Mausoleum beg