In [1]:
from haystack.utils import  convert_files_to_dicts, print_answers
from haystack.nodes import FARMReadery
from haystack.document_stores import FAISSDocumentStorey
from haystack.nodes import DensePassageRetrievery
from haystack.pipelines import ExtractiveQAPipeline



In [3]:
document_store = FAISSDocumentStore.load(faiss_index_factory_str="Flat")

In [None]:
# doc_dir = "Data/article_txt_got"
# s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt.zip"
# fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

In [None]:
doc_dir = "../../Site Data/Data"
dicts = convert_files_to_dicts(dir_path=doc_dir,split_paragraphs=True)
document_store.write_documents(dicts)

In [None]:
retriever = DensePassageRetriever(document_store=document_store,
                                 query_embedding_model='facebook/dpr-question_encoder-single-nq-base',
                                 passage_embedding_model='facebook/dpr-ctx_encoder-single-nq-base',
                                 max_seq_len_query=64,
                                 max_seq_len_passage=256,
                                 batch_size=16,
                                 use_gpu=True,
                                 embed_title=True,
                                 use_fast_tokenizers=True)
document_store.update_embeddings(retriever)

In [None]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

In [None]:
pipeline = ExtractiveQAPipeline(reader, retriever)

In [22]:
prediction = pipeline.run(query="Where is mits located?",
                         params = {"Retriever":{"top_k":10}, 
                                  "Reader":{"top_k":10}})

Inferencing Samples: 100%|██████████| 1/1 [00:12<00:00, 12.25s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.40 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.40 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.41 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.40 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.40 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:05<00:00,  5.19s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:10<00:00, 10.53s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.35s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.40 Batches/s]


In [23]:
print_answers(prediction,details="minimum")


Query: Where is mits located?
Answers:
[   {   'answer': 'Ernakulam',
        'context': 'ering (CSE) at the Muthoot Institute of Technology and '
                   'Science (MITS), Ernakulam! As you glance through the '
                   'faculty profile, I am sure that you will a'},
    {   'answer': 'Puthencruz',
        'context': 'Department profile Muthoot Institute of Technology and '
                   'Science (MITS) Puthencruz was established in May 2013 and '
                   'started its academic program in Augus'},
    {   'answer': 'Kochi',
        'context': 'pal Muthoot Institute of Technology & Science Varikoli '
                   'P.O., Puthencruz, Kochi – 682308, Ernakulam District. Ph. '
                   '0484 – 2732100, 2732111, 2733011 Fax:'},
    {   'answer': 'Varikoli overlooking the Kochi-Madurai National Highway',
        'context': '7 kms from the MITS Campus. MITS is situated at Varikoli '
                   'overlooking the Kochi-Madurai National H

In [None]:
#Saving the models
retriever.save("context_model_retriever")
document_store.save("model")
# document_store.save("context_model_store.faiss")

doc_tmp = FAISSDocumentStore.load("model")
ret_tmp  = DensePassageRetriever.load("context_model_retriever",doc_tmp)

document_store.save("model")
doc_tmp = FAISSDocumentStore.load("model")

In [None]:
document_store = FAISSDocumentStore.load("model")
reader = FARMReader(model_name_or_path="roberta_base_squad2", use_gpu=True, num_processes=0)
retriever = DensePassageRetriever.load("context_model", document_store)
pipeline = ExtractiveQAPipeline(reader, retriever)

In [6]:
prediction = pipeline.run("Where is MITS located")

  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.42s/ Batches]


In [31]:
ans = prediction


In [34]:
ans["query"]

'Where is MITS located'

In [17]:
id = ans.document_id
# document_store.get_all_documents

In [19]:
document_store.get_document_by_id(id).meta

{'name': 'mits_departments_computer-science-and-engineering_.txt',
 'vector_id': '68'}

In [29]:
import json

answers = []
for answer in prediction["answers"]:
    tmp = dict() 
    tmp["documnet_link"] = answer.meta["name"] 
    tmp["result"] = answer.answer
    tmp["context"] = answer.context
    answers.append(tmp)

answers



[{'documnet_link': 'mits_departments_computer-science-and-engineering_.txt',
  'result': 'Ernakulam',
  'context': 'ering (CSE) at the Muthoot Institute of Technology and Science (MITS), Ernakulam! As you glance through the faculty profile, I am sure that you will a'},
 {'documnet_link': 'mits_contact-us_.txt',
  'result': 'Varikoli',
  'context': ' approximately 8 kms &17 kms from the MITS Campus. MITS is situated at Varikoli overlooking the Kochi-Madurai National Highway about 5 kms from Thiruv'},
 {'documnet_link': 'mits_departments_computer-science-and-engineering_.txt',
  'result': 'Muthoot Institute of Technology and Science (MITS) Puthencruz',
  'context': 'r Science and Engineering Department profile Muthoot Institute of Technology and Science (MITS) Puthencruz was established in May 2013 and started its'},
 {'documnet_link': 'mits_admission_contact_.txt',
  'result': 'Puthencruz, Kochi',
  'context': 'Principal Muthoot Institute of Technology & Science Varikoli P.O., Puthencru

In [22]:
ans

<Answer {'answer': 'Ernakulam', 'type': 'extractive', 'score': 0.9459921419620514, 'context': 'ering (CSE) at the Muthoot Institute of Technology and Science (MITS), Ernakulam! As you glance through the faculty profile, I am sure that you will a', 'offsets_in_document': [{'start': 2552, 'end': 2561}], 'offsets_in_context': [{'start': 71, 'end': 80}], 'document_id': 'd767f7197da2215466af2d02f5d2a858', 'meta': {'name': 'mits_departments_computer-science-and-engineering_.txt', 'vector_id': '68'}}>