In [1]:
import numpy as np
import pandas as pd
import json
import glob
import os
import re
from bs4 import BeautifulSoup 

from haystack import Finder
from haystack.preprocessor.cleaning import clean_wiki_text
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader
from haystack.utils import print_answers
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.retriever.sparse import ElasticsearchRetriever

11/07/2020 17:23:48 - INFO - faiss -   Loading faiss.


## Define the QA model

In [2]:
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")

11/07/2020 17:23:53 - INFO - elasticsearch -   HEAD http://localhost:9200/document [status:200 request:0.050s]
11/07/2020 17:23:53 - INFO - elasticsearch -   GET http://localhost:9200/document [status:200 request:0.005s]
11/07/2020 17:23:53 - INFO - elasticsearch -   PUT http://localhost:9200/document/_mapping [status:200 request:0.096s]
11/07/2020 17:23:53 - INFO - elasticsearch -   HEAD http://localhost:9200/label [status:200 request:0.004s]


In [8]:
document_store.delete_all_documents(index = 'document')

10/11/2020 22:02:48 - INFO - elasticsearch -   POST http://localhost:9200/document/_delete_by_query [status:200 request:0.693s]


In [6]:
document_store.get_document_count()

11/07/2020 13:57:44 - INFO - elasticsearch -   POST http://localhost:9200/document/_count [status:200 request:0.093s]


0

In [10]:
retriever = ElasticsearchRetriever(document_store=document_store)

In [11]:
# use the pretrained bert model on the biology pubmed dataset
reader = FARMReader("ktrapeznikov/biobert_v1.1_pubmed_squad_v2", use_gpu=False)

11/07/2020 17:31:34 - INFO - farm.utils -   device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None
11/07/2020 17:31:34 - INFO - farm.infer -   Could not find `ktrapeznikov/biobert_v1.1_pubmed_squad_v2` locally. Try to download from model hub ...
	 We guess it's an *ENGLISH* model ... 
	 If not: Init the language model by supplying the 'language' param.
11/07/2020 17:31:43 - INFO - farm.utils -   device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None
11/07/2020 17:31:43 - INFO - farm.infer -   Got ya 7 parallel workers to do inference ...
11/07/2020 17:31:43 - INFO - farm.infer -    0    0    0    0    0    0    0 
11/07/2020 17:31:43 - INFO - farm.infer -   /w\  /w\  /w\  /w\  /w\  /w\  /w\
11/07/2020 17:31:43 - INFO - farm.infer -   /'\  / \  /'\  /'\  / \  / \  /'\
11/07/2020 17:31:43 - INFO - farm.infer -               


In [12]:
finder = Finder(reader, retriever)

## Articles XML files

In [7]:
all_doc_files = glob.glob('../data/HACKXML0000000004/**/*.xml', recursive=True)[1:]

In [8]:
len(all_doc_files)

263

In [9]:
def nf2020toDict():
    paths = "../data/HACKXML0000000004/**/*.xml"
    target_tags = ['simple-para', 'para']

    docs = []
    for path in glob.glob(paths)[1:]:
        with open(path, 'r') as f: 
            data = BeautifulSoup(f.read() , "xml") 
        temp = {}
        temp["meta"] = {}
        temp["meta"]['title'] = data.find('title').getText()
        temp["meta"]["paper_id"] = data.find('aid').getText()
        temp["meta"]["doi"] = data.find('doi').getText()
        temp["meta"]["jid"] = data.find('jid').getText()
        
        paper_text = [t for t in data.find_all(text=True) if t.parent.name in target_tags]
        temp["text"] = ''.join(paper_text)
        
        docs.append(temp)
    return docs

In [10]:
nf_docs = nf2020toDict()

In [37]:
#nf_docs[1]['text']

In [26]:
#dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)

In [11]:
# Now, let's write the dicts containing documents to our DB.
document_store.write_documents(nf_docs)

11/07/2020 13:58:06 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:2.294s]


In [15]:
prediction = finder.get_answers(question="What is Neurofibromatosis", top_k_retriever=10, top_k_reader=5)

11/07/2020 13:58:40 - INFO - elasticsearch -   POST http://localhost:9200/document/_search [status:200 request:0.163s]
11/07/2020 13:58:40 - INFO - haystack.retriever.sparse -   Got 10 candidates from retriever
11/07/2020 13:58:40 - INFO - haystack.finder -   Reader is looking for detailed answer in 127755 chars ...
Inferencing Samples: 100%|██████████| 1/1 [00:17<00:00, 17.16s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:04<00:00,  4.09s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:05<00:00,  5.30s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:09<00:00,  9.75s/ Batches]
Inferencing Samples: 100%|██████████| 2/2 [00:52<00:00, 26.19s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:09<00:00,  9.83s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:11<00:00, 11.48s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:12<00:00, 12.15s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:16<00:00, 16.38s/ Batches]
Inferencing Samples: 100%

In [16]:
print_answers(prediction, details="minimal")

[   {   'answer': 'benign neurofibroma',
        'context': 'is muscle in 2001. The pathological diagnoses were '
                   'lipofibroma or benign neurofibroma.When he presented at '
                   'our department in 2005, computed tomography'},
    {   'answer': 'subperiosteal neurofibromas',
        'context': 'e represents only the third case in the medical literature '
                   'of subperiosteal neurofibromas presenting with '
                   'subperiosteal hemorrhage. As in this case, t'},
    {   'answer': 'neurofibromatosis',
        'context': 'to be differentiated from a long list of diagnoses . '
                   'Historically, neurofibromatosis was the most common '
                   'mistaken diagnosis. The most important step i'},
    {   'answer': 'dermal neurofibroma',
        'context': 'ciated MPNST patients.Previously, we established a mouse '
                   'model of dermal neurofibroma (dNF) where we demonstrate '
                   

## Other paper docs only containing the abstract

In [17]:
ctf_hackathon_doc = pd.read_json('../data/ctf-hackathon-upload.json', lines=True)

In [18]:
ctf_hackathon_doc.columns

Index(['Eid', 'abstract', 'affiliation_organization', 'title', 'year',
       'sourcetitle', 'issn_print', 'doi', 'openaccess', 'meta_language',
       'References', 'pmid', 'publishername', 'keywords', 'funding_text'],
      dtype='object')

In [19]:
ctf_hackathon_doc.shape

(5702, 15)

In [20]:
len(ctf_hackathon_doc.doi.unique())

5129

In [25]:
with open('../data/HACKXML0000000004/dataset.xml', 'r') as f: 
    papers_info = f.read() 
papers_info_data = BeautifulSoup(papers_info, "xml") 

In [26]:
np.unique([tag.name for tag in papers_info_data.find_all()])

array(['aid', 'article-number', 'collection-title', 'dataset',
       'dataset-action', 'dataset-content', 'dataset-properties',
       'dataset-unique-ids', 'delayed-sponsored-article', 'doi',
       'dtd-version', 'embargo', 'files-info', 'filesize',
       'funding-body-id', 'issn', 'jid', 'jid-aid', 'journal-item',
       'journal-item-properties', 'journal-item-unique-ids', 'ml',
       'online-publication-date', 'pathname', 'pii', 'pit',
       'production-process', 'production-type', 'profile-code',
       'profile-dataset-id', 'purpose', 'sponsored-access', 'stage',
       'timestamp', 'type', 'version', 'version-number', 'weight'],
      dtype='<U25')

In [27]:
paper_xml_doi_ls = np.unique([t.getText() for t in papers_info_data.find_all('doi')])

In [28]:
# other nf docs
other_nfpaper_docs = ctf_hackathon_doc[~ctf_hackathon_doc.doi.isin(paper_xml_doi_ls)].copy()

In [29]:
other_nfpaper_docs.shape

(5440, 15)

In [38]:
other_nfpaper_docs.sample(5)

Unnamed: 0,Eid,abstract,affiliation_organization,title,year,sourcetitle,issn_print,doi,openaccess,meta_language,References,pmid,publishername,keywords,funding_text
2138,84872359373,Background context: An extraforaminal disc herniation may present as a retro...,"[[Department of Neurologic Surgery, Mayo Clinic], [Department of Radiology, ...",L5-S1 extraforaminal intraneural disc herniation mimicking a malignant perip...,2012,Spine Journal,[15299430],10.1016/j.spinee.2012.10.033,,English,"[Tarlov, I.M., , Spinal perineurial and meningeal cysts. (1970), Journal of ...",23246211.0,Elsevier Inc.,"[Herniated disc, Lumbar, Malignant peripheral nerve sheath tumor, Retroperit...",
3406,84897930288,Gastrointestinal stromal tumors (GIST) are the most common mesenchymatous ne...,"[[Servicios de Genética, Hospital Provincial Neuquén], [Servicios de Oncolog...","Dysphagia, melanosis, gastrointestinal stromal tumors and a germinal mutatio...",2014,Acta Gastroenterologica Latinoamericana,[03009033],,,English,"[Beghini, A., Tibiletti, M., Roversi, G., Chiaravalli, A., Serio, G., Capell...",24847623.0,Sociedad Argentina de Gastroenterologia,"[Dysphagia, GIST, KIT]",
3995,85076330369,Background Neurofibromatosis type 1 (NF1) is a rare genetic disorder with us...,"[[Department of Psychology, University of Oslo], [Frambu Resource Centre for...",Health complaints and work experiences among adults with neurofibromatosis 1,2019,Occupational Medicine,[09627480],10.1093/occmed/kqz134,,English,"[Abramowicz, A., Gos, M., , Neurofibromin in neurofibromatosis type 1 - muta...",31674646.0,Oxford University Press,"[Disability, NF1, Neurofibromatosis type 1, Rare disorders, Subjective healt...",
3827,84923699181,Summary: Malignant peripheral nerve sheath tumour (MPNST) is a rare variety ...,"[[Department of Radiation Oncology, Dr BRA Institute Rotary Cancer Hospital,...",Malignant peripheral nerve sheath tumour of penis,2015,Andrologia,[03034569],10.1111/and.12267,,English,"[Mortell, A., Amjad, B., Breatnach, F., Devaney, D., Puri, P., , Penile mali...",24635732.0,Blackwell Publishing Ltd,"[Malignant peripheral nerve sheath tumour, Multimodality, Penis, Treatment]",
1843,84925385178,We present the first case of pediatric intracranial Mycobacterium abscessus ...,"[[Department of Pediatrics, Georgia Regents University, University of Georgi...",Intracranial Mycobacterium abscessus infection in a healthy toddler,2015,Journal of Pediatric Infectious Diseases,[13057707],10.1097/INF.0000000000000520,,English,"[Kiliç, S., Tezcan, I., Sanal, O., Ersoy, F., , Common variable immunodefici...",25144796.0,IOS Press,"[Brain/pathology, Female, Humans, M. Abscessus, Mycobacterium infections/mic...",


In [40]:
other_nfpaper_docs_dicts = []
for i, row in other_nfpaper_docs.iterrows():
    temp = {}
    if isinstance(row['abstract'], str):
        temp['text'] = row['abstract']
        temp['meta'] = {}
        temp['meta']['title'] = row['title']
        temp['meta']['doi'] = row['doi'] if isinstance(row['doi'], str) else ''
        #temp['meta']['References'] = ', '.join(row['References']) if isinstance(row['References'], list) else ''
        temp['meta']['keywords'] = ' '.join(row['keywords']) if isinstance(row['keywords'], list) and len(row['keywords'])>0 else ''
        other_nfpaper_docs_dicts.append(temp)

In [41]:
len(other_nfpaper_docs_dicts)

5018

In [42]:
other_nfpaper_docs_dicts[0]

{'text': "Introduction. Neurofibromatosis type 1 is an autosomal dominant disorder that occurs across all ethnic groups and affects approximately one in 4000 individuals. One of the most noticeable characteristics of the disease is the development of neurofibromas. Case presentation. A total of 258 patients (131 women, 127 men) with neurofibromatosis type 1 were evaluated between 1994 and 2004 in our hospital's dermatology department. Nine patients (3.45%, 95% confidence limits 1.22 to 5.68) had neurofibromas of the breast. One of these nine patients presented with an extensive congenital plexiform neurofibroma in the outer quadrants of her right breast, extending to the nipple-areolar complex. Meanwhile, three patients had more than one neurofibroma on the nipple-areolar complexes. Three patients had a family history of neurofibroma. Over the years 1994 to 2004, the cutaneous lesions were not associated with any malignancies. Presenting symptoms were related to conditions such as incr

In [43]:
document_store.get_document_count()

11/07/2020 16:55:09 - INFO - elasticsearch -   POST http://localhost:9200/document/_count [status:200 request:0.088s]


262

In [44]:
document_store.write_documents(other_nfpaper_docs_dicts)

11/07/2020 16:55:13 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.117s]
11/07/2020 16:55:14 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.006s]
11/07/2020 16:55:15 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.020s]
11/07/2020 16:55:17 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.029s]
11/07/2020 16:55:18 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.026s]
11/07/2020 16:55:19 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.021s]
11/07/2020 16:55:20 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.029s]
11/07/2020 16:55:21 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.021s]


In [45]:
document_store.get_document_count()

11/07/2020 16:55:24 - INFO - elasticsearch -   POST http://localhost:9200/document/_count [status:200 request:0.007s]


5280

In [46]:
other_nfpaper_docs.sample(4)['abstract']

2528    We report a case of synchronous multiple colon adenocarcinomas in a patient ...
5481                                                                                NaN
4221    Neurofibromin regulates cell motility via three distinct GTPase pathways act...
4120    Optic pathway gliomas (OPGs) occur in 1520 of children with neurofibromatosi...
Name: abstract, dtype: object

In [47]:
%timeit
prediction = finder.get_answers(question="Will NF1 turn into NF2", top_k_retriever=10, top_k_reader=5)

11/07/2020 16:55:46 - INFO - elasticsearch -   POST http://localhost:9200/document/_search [status:200 request:0.031s]
11/07/2020 16:55:46 - INFO - haystack.retriever.sparse -   Got 10 candidates from retriever
11/07/2020 16:55:46 - INFO - haystack.finder -   Reader is looking for detailed answer in 12810 chars ...
Inferencing Samples: 100%|██████████| 1/1 [00:03<00:00,  3.20s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.74s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.19s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.70s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.17s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.67s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.72 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.66s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.71 Batches/s]
Inferencing Samples: 100%|

In [102]:
print_answers(prediction, details="minimal")

[   {   'answer': 'There is no targeted drug therapy',
        'context': ' Purpose: There is no targeted drug therapy for NF2 '
                   'patients, and surgery or radiosurgery is not always '
                   'effective. Therefore, the exploration of new t'},
    {   'answer': 'This will generate new ideas about the biological '
                  'mechanisms involved in autism, which in turn may provide '
                  'new leads for the development of novel pharmaceutical '
                  'compounds.',
        'context': ' This will generate new ideas about the biological '
                   'mechanisms involved in autism, which in turn may provide '
                   'new leads for the development of novel pharmaceutical '
                   'compounds.'},
    {   'answer': 'neurofibromatosis type 2',
        'context': ' and researchers interested in neurofibromatosis type 1 '
                   '(NF1), neurofibromatosis type 2 (NF2), and schwannomatosis '
    

## Compiled PDF docs from the NF Registry website

In [3]:
document_store.get_document_count()

11/07/2020 17:24:07 - INFO - elasticsearch -   POST http://localhost:9200/document/_count [status:200 request:0.025s]


5280

In [4]:
pdf_dicts = convert_files_to_dicts(dir_path='../data/nf_info_pdfs/', split_paragraphs=True)

11/07/2020 17:24:13 - INFO - haystack.preprocessor.utils -   Converting ../data/nf_info_pdfs/Genetics of NF - Children-s Tumor Foundation.pdf
11/07/2020 17:24:13 - INFO - haystack.preprocessor.utils -   Converting ../data/nf_info_pdfs/Schwannomatosis - Children-s Tumor Foundation.pdf
11/07/2020 17:24:13 - INFO - haystack.preprocessor.utils -   Converting ../data/nf_info_pdfs/CTF_About_NF2_Info_Sheet-2019.pdf
11/07/2020 17:24:13 - INFO - haystack.preprocessor.utils -   Converting ../data/nf_info_pdfs/NF1 - Children-s Tumor Foundation.pdf
11/07/2020 17:24:13 - INFO - haystack.preprocessor.utils -   Converting ../data/nf_info_pdfs/About_NF_120315.pdf
11/07/2020 17:24:13 - INFO - haystack.preprocessor.utils -   Converting ../data/nf_info_pdfs/NF2 - Children-s Tumor Foundation.pdf
11/07/2020 17:24:13 - INFO - haystack.preprocessor.utils -   Converting ../data/nf_info_pdfs/About_Schwannomatosis_Fact_Sheet_2020-2.pdf


In [5]:
for p in pdf_dicts:
    p['text'] = re.sub(r'For more information on.*', '', ' '.join(p['text'].split('\n')))

In [6]:
pdf_docs = [p for p in pdf_dicts if not p['text'].startswith('Help end NF by joining the confidential NF Registry') and len(p['text'])>50]

In [7]:
len(pdf_docs)

7

In [13]:
document_store.write_documents(pdf_docs)

11/07/2020 17:32:15 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:0.119s]


In [14]:
document_store.get_document_count()

11/07/2020 17:32:25 - INFO - elasticsearch -   POST http://localhost:9200/document/_count [status:200 request:0.007s]


5287

In [43]:
nfregistry_docs = pd.DataFrame.from_dict(pdf_docs)
nfregistry_docs['document_identifie'] = nfregistry_docs['meta'].map(lambda x:x['name'].replace('.pdf', ''))
nfregistry_docs.rename(columns = {'text': 'document_text'}, inplace = True)
nfregistry_docs.drop(columns = 'meta', inplace=True)

In [44]:
nfregistry_docs

Unnamed: 0,document_text,document_identifie
0,Genetics of NF NF can occur in any family. Affected individuals can inhe fro...,Genetics of NF - Children-s Tumor Foundation
1,"Children's Tumor Foundat About Schwannomatosis Neurofibromatosis, also known...",Schwannomatosis - Children-s Tumor Foundation
2,"ABOUT NEUROFIBROMATOSIS TYPE 2 (NF2) Neurofibromatosis, or NF, is an under-...",CTF_About_NF2_Info_Sheet-2019
3,"Children's Tumor Foundation About NF1 Neurofibromatosis, also known as NF, i...",NF1 - Children-s Tumor Foundation
4,"N eurofibromatosis, or NF, is an under-recognized genetic disorder that can...",About_NF_120315
5,Common Signs of NF2 The appearance of the most common signs of NF2 usually v...,NF2 - Children-s Tumor Foundation
6,"ABOUT SCHWANNOMATOSIS Neurofibromatosis, or NF, is an under-recognized gene...",About_Schwannomatosis_Fact_Sheet_2020-2


In [45]:
nfregistry_docs.to_csv('../data/nfregistry_docs/nfregistry_info.csv')

## QA model validation 

In [15]:
prediction = finder.get_answers(question="how many types of NF", top_k_retriever=10, top_k_reader=5)

11/07/2020 17:33:08 - INFO - elasticsearch -   POST http://localhost:9200/document/_search [status:200 request:0.021s]
11/07/2020 17:33:08 - INFO - haystack.finder -   Got 10 candidates from retriever
11/07/2020 17:33:08 - INFO - haystack.finder -   Reader is looking for detailed answer in 18868 chars ...
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.17s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.26s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.68s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.68s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.71s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.76 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:06<00:00,  6.92s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.64 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.13s/ Batches]
Inferencing Samples: 100%|██████████

In [16]:
print_answers(prediction, details="minimal")

[   {   'answer': 'three',
        'context': 'NF, is a genetic disorde that affects 1 in every 3,000 '
                   'people. There are three types NF: NF1, NF2, and '
                   'schwannomatosis. Neurofibromatosis type (NF1), '},
    {   'answer': 'two',
        'context': 'l-dominant disease with an incidence of 1/3,000 in live '
                   'births. There are two types of NF, NF 1 and NF 2, and NF 1 '
                   'is the most common type. This study'},
    {   'answer': 'two',
        'context': 'al-dominant disease with an incidence of 1 in 3000 live '
                   'births. There are two types of NF, NF 1 and NF 2, and NF 1 '
                   'is the most common. This study repo'},
    {   'answer': 'two or more neurofibromas of any type',
        'context': ' or 15 mm or larg post-pubertal individuals) Presence of '
                   'two or more neurofibromas of any type, or on more '
                   'plexiform neurofibromas Freckling under

In [141]:
prediction = finder.get_answers(question="what are some symtoms of NF2", top_k_retriever=10, top_k_reader=5)

10/13/2020 17:27:42 - INFO - elasticsearch -   POST http://localhost:9200/document/_search [status:200 request:0.095s]
10/13/2020 17:27:42 - INFO - haystack.retriever.sparse -   Got 10 candidates from retriever
10/13/2020 17:27:42 - INFO - haystack.finder -   Reader is looking for detailed answer in 17740 chars ...
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.82 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  2.07 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  2.16 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  2.20 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  2.74 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.57s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.95 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.53s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.27 Batches/s]
Inferencing Samples: 100%|

In [142]:
print_answers(prediction, details="minimal")

[   {   'answer': 'neurofibromatosis type 2',
        'context': 'r: (1) a malignant peripheral nerve sheath tumour (MPNST); '
                   '(2) neurofibromatosis type 2 (NF2); or (3) schwannomatosis '
                   '(SWNTS). The progressively enlar'},
    {   'answer': 'ocular, dermatological, and neurological symptoms',
        'context': 'as, children with NF2 most frequently present with ocular, '
                   'dermatological, and neurological symptoms. Arteriopathy, a '
                   'well-established feature in neur'},
    {   'answer': 'neurofibromatosis type 2',
        'context': 'for preoperative planning. QUESTION 6 How should patients '
                   'with neurofibromatosis type 2 (NF2) and vestibular '
                   'schwannoma be imaged and over what follow'},
    {   'answer': 'malformation of midline guideposts',
        'context': ' and prevent ectopic Slit2 expression. Loss of Nf2 causes '
                   'malformation of midline gui

In [17]:
prediction = finder.get_answers(question="how many mutations related to NF1", top_k_retriever=5, top_k_reader=3)

11/07/2020 17:34:24 - INFO - elasticsearch -   POST http://localhost:9200/document/_search [status:200 request:0.030s]
11/07/2020 17:34:24 - INFO - haystack.finder -   Got 5 candidates from retriever
11/07/2020 17:34:24 - INFO - haystack.finder -   Reader is looking for detailed answer in 7989 chars ...
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.16s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.15s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.68s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.15s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.71s/ Batches]


In [18]:
print_answers(prediction, details="minimal")

[   {   'answer': 'very similar mutations',
        'context': 'isease causing mutations." We show that the splicing '
                   'outcome of very similar mutations can be very different '
                   'due to variations in trans-acting factor('},
    {   'answer': 'KRAS mutants that strongly interacted with and '
                  'competitively inhibited NF1 drove wild-type RAS activation '
                  'in an EGFR-independent manner, whereas KRASG13D',
        'context': ' KRAS mutants that strongly interacted with and '
                   'competitively inhibited NF1 drove wild-type RAS activation '
                   'in an EGFR-independent manner, whereas KRASG13D'},
    {   'answer': 'neurofibromatosis type 1',
        'context': 'ts with tumours due to multiple endocrine neoplasia type 2 '
                   'and neurofibromatosis type 1 (NF1) showed similar '
                   'catecholamine metabolite and secretory pr'}]


In [21]:
prediction = finder.get_answers(question="what are the clinical manifestations for mutation Y489C in NF1 ", top_k_retriever=10, top_k_reader=5)

11/07/2020 17:35:30 - INFO - elasticsearch -   POST http://localhost:9200/document/_search [status:200 request:0.024s]
11/07/2020 17:35:30 - INFO - haystack.finder -   Got 10 candidates from retriever
11/07/2020 17:35:30 - INFO - haystack.finder -   Reader is looking for detailed answer in 15015 chars ...
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.34 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.72s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.67s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.66s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.72s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.68 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.66s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.17s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.71s/ Batches]
Inferencing Samples: 100%|██████████

In [22]:
print_answers(prediction, details="minimal")

[   {   'answer': 'neurofibromas only and a combination of neurofibromas and '
                  'pigmentary changes',
        'context': 'linical manifestations are reported: neurofibromas only '
                   'and a combination of neurofibromas and pigmentary changes. '
                   'Segmental NF1 is an example of mosa'},
    {   'answer': 'intracranial hypotension',
        'context': ' They describe the clinical presentation and '
                   'manifestations of intracranial hypotension, as well as the '
                   'pertinent investigations. They also review the'},
    {   'answer': 'also had bilateral pheochromocytomas and mucosal neuromas',
        'context': 'was disease free after surgical resection. She also had '
                   'bilateral pheochromocytomas and mucosal neuromas leading '
                   'to the clinical diagnosis of MEN-2B. '},
    {   'answer': 'café-au-lait macules and skinfold freckling',
        'context': 'c criteria for ne