In [1]:
from haystack import Pipeline
from haystack.nodes import FARMReader
from haystack.nodes import DensePassageRetriever
import os
from haystack.document_stores import ElasticsearchDocumentStore
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")

In [3]:
document_store = ElasticsearchDocumentStore(
        host=host,
        username="",
        password="",
        index="document",
        similarity="dot_product",
        embedding_dim=768,
    )



In [4]:
retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"
)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.


In [5]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

Downloading (…)lve/main/config.json: 100%|██████████| 571/571 [00:00<00:00, 77.4kB/s]
Downloading pytorch_model.bin: 100%|██████████| 496M/496M [00:01<00:00, 327MB/s] 
Downloading (…)okenizer_config.json: 100%|██████████| 79.0/79.0 [00:00<00:00, 31.1kB/s]
Downloading (…)olve/main/vocab.json: 100%|██████████| 899k/899k [00:00<00:00, 55.8MB/s]
Downloading (…)olve/main/merges.txt: 100%|██████████| 456k/456k [00:00<00:00, 85.0MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 772/772 [00:00<00:00, 330kB/s]


In [6]:
# initialize qna pipeline for reddit posts
querying_pipeline = Pipeline()
querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

In [7]:
query = "When was Pratt School of Engineering founded?"

In [8]:
prediction = querying_pipeline.run(
    query=query,
    params={
        "Retriever": {"top_k": 10},
        "Reader": {"top_k": 5}
        }
)

Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  2.78 Batches/s]


In [9]:
prediction

{'query': 'When was Pratt School of Engineering founded?',
 'no_ans_gap': 6.074307441711426,
 'answers': [<Answer {'answer': '1851', 'type': 'extractive', 'score': 0.950855016708374, 'context': 'ering at Duke The Pratt School of Engineering traces its history back to 1851 when Normal College, a forerunner of Duke University, advertised a Class', 'offsets_in_document': [{'start': 919, 'end': 923}], 'offsets_in_context': [{'start': 73, 'end': 77}], 'document_ids': ['8ac5c618edf87ef60f6cfb703ad8d98e'], 'meta': {'_split_id': 0}}>,
  <Answer {'answer': '1947', 'type': 'extractive', 'score': 0.6894222497940063, 'context': 'ke as engineering dean in 1999. Later that year the school was named for 1947 electrical engineering graduate and philanthropist Edmund T. Pratt Jr., ', 'offsets_in_document': [{'start': 864, 'end': 868}], 'offsets_in_context': [{'start': 73, 'end': 77}], 'document_ids': ['6ad9a9a0d75efa87f008c3a84094a997'], 'meta': {'_split_id': 3}}>,
  <Answer {'answer': '1937', 'type': 

In [10]:
answers = pd.DataFrame([i.to_dict() for i in prediction["answers"]])

In [11]:
answers

Unnamed: 0,answer,type,score,context,offsets_in_document,offsets_in_context,document_ids,meta
0,1851,extractive,0.950855,ering at Duke The Pratt School of Engineering traces its history back to 185...,"[{'start': 919, 'end': 923}]","[{'start': 73, 'end': 77}]",[8ac5c618edf87ef60f6cfb703ad8d98e],{'_split_id': 0}
1,1947,extractive,0.689422,ke as engineering dean in 1999. Later that year the school was named for 194...,"[{'start': 864, 'end': 868}]","[{'start': 73, 'end': 77}]",[6ad9a9a0d75efa87f008c3a84094a997],{'_split_id': 3}
2,1937,extractive,0.083638,an of the new college. With engineering alumni active and organized from 193...,"[{'start': 684, 'end': 688}]","[{'start': 73, 'end': 77}]",[9b3f892980e6dae18f7b1373533408d7],{'_split_id': 1}
3,1966,extractive,0.049353,pointed dean in 1963. The college was named the School of Engineering in 196...,"[{'start': 1014, 'end': 1018}]","[{'start': 73, 'end': 77}]",[7bab54b0d21cad535e31cd66d98f755b],{'_split_id': 2}
4,1937,extractive,0.013331,udying engineering. When Duke established the Division of Engineering in 193...,"[{'start': 281, 'end': 285}]","[{'start': 73, 'end': 77}]",[9b3f892980e6dae18f7b1373533408d7],{'_split_id': 1}


In [12]:
answers['document_ids'] = answers['document_ids'].apply(lambda x: x[0])

In [13]:
documents = pd.DataFrame([i.to_dict() for i in prediction["documents"]])

In [14]:
merge = pd.merge(documents, answers, left_on="id", right_on="document_ids", how="inner")

In [15]:
merge["content"].head(1).values[0]

"## You are here Home » Alumni & Giving # Duke Engineering: A History From its beginnings more than 75 years ago, Duke's Pratt School of Engineering has grown into one of the fastest-rising in the nation On June 3, 1939, the Duke University Board of Trustees approved the creation of the College of Engineering—establishing the school as a cornerstone of the young university. From its beginnings as an undergraduate college with just three faculty members in each department and only 201 students, the School of Engineering has grown into one of the nation’s fastest-rising engineering programs, with nearly 3,000 faculty, staff, and students, 15,000 accomplished alumni, a vibrant research engine and a record of trailblazing achievements that benefit people all over the world. View a text version of this timeline. ## A Brief History of Engineering at Duke The Pratt School of Engineering traces its history back to 1851 when Normal College, a forerunner of Duke University, advertised a Classica