In [None]:
import os
from haystack.telemetry import disable_telemetry
from haystack.document_stores import ElasticsearchDocumentStore

os.environ['REQUESTS_CA_BUNDLE'] = 'cisco_umbrella_root_ca.cer'
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="igt-docs")

In [None]:
from haystack.utils import convert_files_to_docs

doc_dir = "haystackdata/docxtest"
docs = convert_files_to_docs(dir_path=doc_dir, split_paragraphs=True)
print(docs[:3])
document_store.write_documents(docs)

In [None]:
from haystack.nodes import BM25Retriever
from haystack.nodes import FARMReader
from haystack.pipelines import ExtractiveQAPipeline

os.environ['REQUESTS_CA_BUNDLE'] = 'cisco_umbrella_root_ca.cer'
retriever = BM25Retriever(document_store=document_store)
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
pipe = ExtractiveQAPipeline(reader, retriever)

In [None]:
from haystack.utils import print_answers

# example queries:
# "how long does it take to reset the wedge"
# "how many TSM's can be connected to the system"
# "when does the screensaver become active"
# "which unit provides the BEFE interface"
# "what is APC"
# "where is image processing implemented"
# "what is the blue color in APC full system"

prediction = pipe.run(
    query="when does the screensaver become active", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)

print_answers(prediction, details="medium")