In [1]:
import os
from haystack.document_stores import ElasticsearchDocumentStore

# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")
document_store = ElasticsearchDocumentStore(host=host, username="", password="", index="got")

ModuleNotFoundError: No module named 'haystack.document_stores'; 'haystack' is not a package

In [2]:
from haystack.utils import clean_wiki_text, convert_files_to_docs, fetch_archive_from_http

# Let's first fetch some documents that we want to query
# Here: 517 Wikipedia articles for Game of Thrones
doc_dir = "haystackdata/tutorial1"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1.zip"
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

# Convert files to dicts
# You can optionally supply a cleaning function that is applied to each doc (e.g. to remove footers)
# It must take a str as input, and return a str.
docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)

# We now have a list of dictionaries that we can write to our document store.
# If your texts come from a different source (e.g. a DB), you can of course skip convert_files_to_dicts() and create the dictionaries yourself.
# The default format here is:
# {
#    'content': "<DOCUMENT_TEXT_HERE>",
#    'meta': {'name': "<DOCUMENT_NAME_HERE>", ...}
# }
# (Optionally: you can also add more key-value-pairs here, that will be indexed as fields in Elasticsearch and
# can be accessed later for filtering or shown in the responses of the Pipeline)

# Let's have a look at the first 3 entries:
print(docs[:3])

# Now, let's write the dicts containing documents to our DB.
document_store.write_documents(docs)

[<Document: {'content': "Linda Antonsson and Elio García at Archipelacon on June 28, 2015.\n'''Elio Miguel García Jr.''' (born May 6, 1978) and '''Linda Maria Antonsson''' (born November 18, 1974) are authors known for their contributions and expertise in the ''A Song of Ice and Fire'' series by George R. R. Martin, co-writing in 2014 with Martin ''The World of Ice & Fire'', a companion book for the series. They are also the founders of the fansite Westeros.org, one of the earliest fan websites for ''A Song of Ice and Fire''.", 'content_type': 'text', 'score': None, 'meta': {'name': '145_Elio_M._García_Jr._and_Linda_Antonsson.txt'}, 'embedding': None, 'id': '41655cc804bb07b1569f3118ce70e05'}>, <Document: {'content': '\n==Career==\nElio García was attending the University of Miami, while his partner Linda Antonsson was living in Sweden. At that time, in 1996, Antonsson introduced García to the \'\'A Song of Ice and Fire\'\' book series when it came out on paperback. After the second boo

In [3]:
from haystack.nodes import BM25Retriever

retriever = BM25Retriever(document_store=document_store)

In [4]:
from haystack.nodes import FARMReader

# Load a  local model or any of the QA models on
# Hugging Face's model hub (https://huggingface.co/models)

reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

In [5]:
from haystack.pipelines import ExtractiveQAPipeline

pipe = ExtractiveQAPipeline(reader, retriever)


In [6]:
# You can configure how many candidates the Reader and Retriever shall return
# The higher top_k_retriever, the better (but also the slower) your answers.
prediction = pipe.run(
    query="What is the name of Denairis Dragon", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.47s/ Batches]


In [7]:
from haystack.utils import print_answers

# Change `minimum` to `medium` or `all` to raise the level of detail
print_answers(prediction, details="minimum")



Query: What is the name of Denairis Dragon
Answers:
[   {   'answer': 'Viserion',
        'context': 'ng, leader of the White Walkers, kills and reanimates '
                   "Daenerys' dragon Viserion. Jon and Daenerys attempt to "
                   'persuade Cersei to join their cause by sh'},
    {   'answer': 'zaldrīzes',
        'context': "acarys'' an independent lexeme; his High Valyrian term for "
                   "dragon is ''zaldrīzes''. The phrases ''valar morghulis'' "
                   "and ''valar dohaeris'', on the oth"},
    {   'answer': 'Vermithrax',
        'context': ' Vhagar) and others invented for the show. Among them a '
                   'dragon called Vermithrax is mentioned, which is an homage '
                   'to Vermithrax Pejorative from the 19'},
    {   'answer': 'Daenerys arrives with her dragons',
        'context': "ry flees to Eastwatch to request Daenerys Targaryen's aid. "
                   'Daenerys arrives with her dragons before the