In [1]:
from haystack import Document, Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.rankers import TransformersSimilarityRanker

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
docs = [Document(content="Paris is in France"),
        Document(content="Berlin is in Germany"),
        Document(content="Lyon is in France")]

In [3]:
document_store = InMemoryDocumentStore()
document_store.write_documents(docs)

3

In [4]:
retriever = InMemoryBM25Retriever(document_store=document_store)
ranker = TransformersSimilarityRanker()
ranker.warm_up()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [6]:
document_ranker_pipeline = Pipeline()
document_ranker_pipeline.add_component(instance=retriever, name="retriever")
document_ranker_pipeline.add_component(instance=ranker, name="ranker")

In [7]:
document_ranker_pipeline.connect("retriever.documents","ranker.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x000001C27CCFBBF0>
🚅 Components
  - retriever: InMemoryBM25Retriever
  - ranker: TransformersSimilarityRanker
🛤️ Connections
  - retriever.documents -> ranker.documents (List[Document])

In [8]:
query = "Cities in France?"

In [10]:
result = document_ranker_pipeline.run(data={"retriever": {"query": query, "top_k": 3},
                                   "ranker": {"query": query, "top_k": 2}})


In [11]:
print(result)

{'ranker': {'documents': [Document(id=4583a7ddf7396ba413dd877de7b60f44e3512e2f3b1187dd4de32618e03b3d22, content: 'Paris is in France', score: 0.9444293975830078), Document(id=082ef4f19ffd14324bd316902c11d3b44a3cfe820bcc88281c88c92452616300, content: 'Lyon is in France', score: 0.6869028210639954)]}}


In [12]:
for doc in result["ranker"]["documents"]:
    print(doc.content, doc.score)

Paris is in France 0.9444293975830078
Lyon is in France 0.6869028210639954
