In [3]:
import os
from haystack.document_stores import ElasticsearchDocumentStore

host = os.environ.get("ELASTICSEARCH_HOST", "localhost")
document_store = ElasticsearchDocumentStore(host=host, username="", password="", index="ifu")

In [4]:
from haystack.utils import clean_wiki_text, convert_files_to_docs

doc_dir = "haystackdata/ifu"
docs = convert_files_to_docs(dir_path=doc_dir, split_paragraphs=True)
print(docs[:3])
document_store.write_documents(docs)



[<Document: {'content': 'Instructions\nfor Use\nEnglish\n4523 001 01513', 'content_type': 'text', 'score': None, 'meta': {'name': 'azurion_ifu.txt'}, 'embedding': None, 'id': 'e69a0b8d8884c77b55b97ed48f73398'}>, <Document: {'content': 'Philips Azurion\nRelease 2.2\nContents\n1 Introduction', 'content_type': 'text', 'score': None, 'meta': {'name': 'azurion_ifu.txt'}, 'embedding': None, 'id': 'b8ddaeea1d29fcc44e4b76c70a667e1c'}>, <Document: {'content': '17', 'content_type': 'text', 'score': None, 'meta': {'name': 'azurion_ifu.txt'}, 'embedding': None, 'id': 'b9674cccc622d857e0ba3a483c697461'}>]


In [5]:
from haystack.nodes import BM25Retriever

retriever = BM25Retriever(document_store=document_store)

In [6]:
from haystack.nodes import FARMReader

reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

In [7]:
from haystack.pipelines import ExtractiveQAPipeline

pipe = ExtractiveQAPipeline(reader, retriever)


In [25]:
from haystack.utils import print_answers

# You can configure how many candidates the Reader and Retriever shall return
# The higher top_k_retriever, the better (but also the slower) your answers.
prediction = pipe.run(
    query="beam limitation", params={"Retriever": {"top_k": 50}, "Reader": {"top_k": 5}}
)

# Change `minimum` to `medium` or `all` to raise the level of detail
print_answers(prediction, details="medium")


Inferencing Samples: 100%|██████████| 2/2 [00:04<00:00,  2.26s/ Batches]


Query: beam limitation
Answers:
[   {   'answer': 'misaligned',
        'context': 'The X-ray beam is misaligned. The image size is reduced. '
                   'Align the detector to portrait or landscape\n'
                   'orientation.',
        'score': 0.5999181270599365},
    {   'answer': '285',
        'context': '84, 292-294\n'
                   '— Automatic exposure control test 285\n'
                   '— Beam limitation check 285\n'
                   '— Calibration 292\n'
                   '— CBCT calibration 292\n'
                   '— Cleaning the system 275\n'
                   '— Dis',
        'score': 0.33294057846069336},
    {   'answer': 'Never radiate unless absolutely necessary',
        'context': 'ray radiation on the patient (in order\n'
                   'of workflow):\n'
                   '• Never radiate unless absolutely necessary and only '
                   'radiate for as short a time as possible.\n'
                   '• S',
        'score


