In [None]:
#Make sure GPU hardware acceleration is enabled.

%%bash

nvidia-smi

In [2]:
# Set log level to INFO

import logging

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

 

In [None]:
from haystack.document_stores import InMemoryDocumentStore

document_store = InMemoryDocumentStore(use_bm25=True)


: 

In [5]:
#Retrieve the data set
from haystack.utils import fetch_archive_from_http

doc_dir = "data/build_your_first_question_answering_system"

fetch_archive_from_http(
    url="https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1.zip",
    output_dir=doc_dir
)


INFO:haystack.utils.import_utils:Fetching from https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1.zip to 'data/build_your_first_question_answering_system'


True

In [None]:
# Use the TextIndexingPipeline to convert the files into Haystack Document objects and write them into the DocumentStore

import os
from haystack.pipelines.standard_pipelines import TextIndexingPipeline

files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
indexing_pipeline = TextIndexingPipeline(document_store)
indexing_pipeline.run_batch(file_paths=files_to_index)


In [7]:
# Initialize Retriever (that uses the BM25 algorithm)

from haystack.nodes import BM25Retriever

retriever = BM25Retriever(document_store=document_store)


In [18]:
# Initialize the Reader (base-sized RoBERTa question answering model)
import os

from haystack.nodes import OpenAIAnswerGenerator

reader = OpenAIAnswerGenerator(api_key=os.getenv("OPENAI_API_KEY"), azure_base_url="https://rg-openai-sandbox.openai.azure.com/",
                               azure_deployment_name="davinci-003", 
                               model="text-davinci-003")


In [21]:
# Create the pipeline

from haystack.pipelines import ExtractiveQAPipeline

pipe = ExtractiveQAPipeline(reader, retriever)


In [24]:
# Let's ask some questions!
from pprint import pprint
from haystack.utils import print_answers

answer = pipe.run(
    query="Can you describe characteristics of Arya Stark?",
    params={
        "Retriever": {"top_k": 10},
        "Reader": {"top_k": 5}
    }
)

# pprint(answer)
print_answers(
    answer,
    details="minimum" ## Choose from `minimum`, `medium`, and `all`
)



'Query: Can you describe characteristics of Arya Stark?'
'Answers:'
[   {   'answer': ' Arya Stark is a character in the series "A Song of Ice and '
                  'Fire" who is described as brave, determined, and '
                  'resourceful. She is known for chasing cats and for her '
                  'pursuit of justice. She is also a skilled fighter and has'},
    {   'answer': " Arya Stark is a character in the series ''A Song of Ice "
                  "and Fire''. She is a young girl who is brave and "
                  'determined, and is often seen chasing cats in scenes filmed '
                  'at Fort St. Angelo. She is also a skilled fighter and'},
    {   'answer': " Arya Stark is a character in the series ''A Song of Ice "
                  "and Fire''. She is a young girl who is brave, determined, "
                  'and resourceful. She is also a skilled fighter and is often '
                  "seen chasing cats in the streets of King's"},
    {   'answer':