In [1]:
%load_ext autoreload
%autoreload 2
import os
import re
import json
import copy
import sys
from tqdm import tqdm
from pprint import pprint

import matplotlib.pyplot as plt
import seaborn as sns

import pytrec_eval
import numpy as np
import pandas as pd
from elasticsearch import Elasticsearch
from haystack.pipelines import Pipeline
from haystack.nodes import BM25Retriever, ElasticsearchRetriever
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import BM25Retriever, SentenceTransformersRanker
from haystack.nodes.reader import FARMReader
from haystack.utils import print_answers
import warnings
warnings.filterwarnings('ignore')

sys.path.append('../../')
import globals
from elastic_search_utils import elastic_utils
from haystack_utils.retrievers import BioASQ_Retriever
import bioasq_eval
import haystack_util

working_folder = globals.PATH.home + '/data/working_folder'
eval_home = globals.PATH.eval_home + '/'
gs_google_docs = eval_home + '/examples/aueb_google_docs/aueb_nlp-bioasq6b-submissions/'
index_name = globals.BIOASQ.index + 'working_folder'
model_id = 'doc_retrieval_test'

es = Elasticsearch(globals.ES.server)

# set document store
document_store = ElasticsearchDocumentStore()

Home path : /opt/bioasq/col-un-bioasq11
Eval path : /opt/bioasq/Evaluation-Measures


In [3]:
test_batch_doc = f'{globals.PATH.home}/data/11b_testset/BioASQ-task11bPhaseA-testset2.json'
test_batch_json = json.load(open(test_batch_doc))

In [4]:
# set document store
document_store = ElasticsearchDocumentStore()
# create the retriever
retriever = BioASQ_Retriever(document_store = document_store)

# create the Sentence Transformer Ranker
ranker = SentenceTransformersRanker(model_name_or_path="cross-encoder/ms-marco-MiniLM-L-12-v2")

# create the Query Pipeline
pipeline = Pipeline()

# add bm25 retriever
pipeline.add_node(component=retriever, name="BM25Retriever", inputs=["Query"])
pipeline.add_node(component=ranker, name="Ranker", inputs=["BM25Retriever"])
my_model = "deepset/roberta-base-squad2"
reader = FARMReader(model_name_or_path=my_model, use_gpu=True)
pipeline.add_node(component=reader, name="Reader", inputs=["Ranker"])

# run the pipeline
prediction = pipeline.run(query="Which factors drive replisome disassembly during DNA replication termination and mitosis", params={"BM25Retriever": {"top_k": 100}})

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

In [5]:
prediction

{'query': 'Which factors drive replisome disassembly during DNA replication termination and mitosis',
 'no_ans_gap': 8.580459594726562,
 'answers': [<Answer {'answer': 'Cullin ubiquitin ligases', 'type': 'extractive', 'score': 0.9014731049537659, 'context': 'Cullin ubiquitin ligases drive replisome disassembly during DNA replication termination. In worm, frog and mouse cells, CUL2LRR1 is required to ubiqui', 'offsets_in_document': [{'start': 0, 'end': 24}], 'offsets_in_context': [{'start': 0, 'end': 24}], 'document_ids': ['34195792'], 'meta': {'title': 'Reconstitution of human CMG helicase ubiquitylation by CUL2LRR1 and multiple E2 enzymes.', 'issue': '478(14)', 'pages': '2825-2842', 'abstract': 'Cullin ubiquitin ligases drive replisome disassembly during DNA replication termination. In worm, frog and mouse cells, CUL2LRR1 is required to ubiquitylate the MCM7 subunit of the CMG helicase. Here, we show that cullin ligases also drive CMG-MCM7 ubiquitylation in human cells, thereby making

In [6]:
def get_snipet_from_answer(pred):
    is_extractive = pred.type == 'extractive'
    answer = {}
    answer['document'] =  globals.BIOASQ.doc_relative_url + pred.document_ids[0]
    answer['text'] = pred.context
    answer['offsetInBeginSection'] = pred.meta['abstract'].rfind(pred.context)
    answer['offsetInEndSection'] = answer['offsetInBeginSection'] + len(pred.context)
    answer["beginSection"] = "abstract",
    answer["endSection"] = "abstract"
    return answer
    
for sample in tqdm(test_batch_json['questions'], position=0):
    prediction = pipeline.run(query=sample['body'], params={"BM25Retriever": {"top_k": 100}, "Ranker": {"top_k": 30}, "Reader": {"top_k": 30} })
    snippets = []
    doc_list_by_snnipets = []
    for ans in prediction['answers']:
        if ans.score > 0.3:
            snippet = get_snipet_from_answer(ans)
            doc_list_by_snnipets.append(snippet['document'])
            snippets.append(snippet)
            
    sample['snippets'] = snippets
    doc_list = [ globals.BIOASQ.doc_relative_url + d.id for d in prediction['documents'] ]
    #sample['documents'] = doc_list[0:10]
    sample['documents'] = list(set(doc_list_by_snnipets))
    
# mindlab base ans.score > 0.3:
# mindlab tns ans.score > 0.4 and only snnipets docs

  0%|                                                                                                                    | 0/75 [00:00<?, ?it/s]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

  1%|█▍                                                                                                          | 1/75 [00:05<06:26,  5.23s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

  3%|██▉                                                                                                         | 2/75 [00:10<06:10,  5.07s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  4%|████▎                                                                                                       | 3/75 [00:15<06:26,  5.37s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  5%|█████▊                                                                                                      | 4/75 [00:21<06:23,  5.40s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

  7%|███████▏                                                                                                    | 5/75 [00:25<05:51,  5.02s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

  8%|████████▋                                                                                                   | 6/75 [00:30<05:44,  5.00s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  9%|██████████                                                                                                  | 7/75 [00:36<06:04,  5.36s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 11%|███████████▌                                                                                                | 8/75 [00:42<06:05,  5.46s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 12%|████████████▉                                                                                               | 9/75 [00:47<05:58,  5.43s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 13%|██████████████▎                                                                                            | 10/75 [00:54<06:10,  5.69s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 15%|███████████████▋                                                                                           | 11/75 [01:00<06:13,  5.83s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 16%|█████████████████                                                                                          | 12/75 [01:05<06:01,  5.73s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 17%|██████████████████▌                                                                                        | 13/75 [01:11<06:01,  5.84s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 19%|███████████████████▉                                                                                       | 14/75 [01:16<05:39,  5.56s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 20%|█████████████████████▍                                                                                     | 15/75 [01:22<05:28,  5.48s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 21%|██████████████████████▊                                                                                    | 16/75 [01:28<05:32,  5.63s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 23%|████████████████████████▎                                                                                  | 17/75 [01:33<05:24,  5.59s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 24%|█████████████████████████▋                                                                                 | 18/75 [01:39<05:30,  5.80s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 25%|███████████████████████████                                                                                | 19/75 [01:44<05:02,  5.39s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 27%|████████████████████████████▌                                                                              | 20/75 [01:48<04:40,  5.09s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 28%|█████████████████████████████▉                                                                             | 21/75 [01:54<04:52,  5.41s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 29%|███████████████████████████████▍                                                                           | 22/75 [01:59<04:39,  5.26s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 31%|████████████████████████████████▊                                                                          | 23/75 [02:06<04:51,  5.60s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 32%|██████████████████████████████████▏                                                                        | 24/75 [02:12<04:59,  5.87s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 33%|███████████████████████████████████▋                                                                       | 25/75 [02:18<04:51,  5.83s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 35%|█████████████████████████████████████                                                                      | 26/75 [02:23<04:32,  5.56s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 36%|██████████████████████████████████████▌                                                                    | 27/75 [02:29<04:40,  5.84s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 37%|███████████████████████████████████████▉                                                                   | 28/75 [02:36<04:52,  6.22s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 39%|█████████████████████████████████████████▎                                                                 | 29/75 [02:43<04:51,  6.33s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 40%|██████████████████████████████████████████▊                                                                | 30/75 [02:49<04:38,  6.20s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 41%|████████████████████████████████████████████▏                                                              | 31/75 [02:54<04:25,  6.03s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 43%|█████████████████████████████████████████████▋                                                             | 32/75 [02:58<03:41,  5.14s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 44%|███████████████████████████████████████████████                                                            | 33/75 [03:03<03:43,  5.31s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 45%|████████████████████████████████████████████████▌                                                          | 34/75 [03:09<03:45,  5.51s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 47%|█████████████████████████████████████████████████▉                                                         | 35/75 [03:15<03:43,  5.60s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 48%|███████████████████████████████████████████████████▎                                                       | 36/75 [03:20<03:35,  5.53s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 49%|████████████████████████████████████████████████████▊                                                      | 37/75 [03:25<03:14,  5.11s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 51%|██████████████████████████████████████████████████████▏                                                    | 38/75 [03:31<03:21,  5.45s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 52%|███████████████████████████████████████████████████████▋                                                   | 39/75 [03:36<03:15,  5.42s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 53%|█████████████████████████████████████████████████████████                                                  | 40/75 [03:42<03:11,  5.47s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 55%|██████████████████████████████████████████████████████████▍                                                | 41/75 [03:47<03:06,  5.48s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 56%|███████████████████████████████████████████████████████████▉                                               | 42/75 [03:52<02:53,  5.27s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 57%|█████████████████████████████████████████████████████████████▎                                             | 43/75 [03:58<02:52,  5.39s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 59%|██████████████████████████████████████████████████████████████▊                                            | 44/75 [04:04<02:53,  5.60s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 60%|████████████████████████████████████████████████████████████████▏                                          | 45/75 [04:09<02:42,  5.42s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 61%|█████████████████████████████████████████████████████████████████▋                                         | 46/75 [04:16<02:51,  5.92s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 63%|███████████████████████████████████████████████████████████████████                                        | 47/75 [04:22<02:43,  5.85s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 64%|████████████████████████████████████████████████████████████████████▍                                      | 48/75 [04:25<02:16,  5.05s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 65%|█████████████████████████████████████████████████████████████████████▉                                     | 49/75 [04:29<02:07,  4.92s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 67%|███████████████████████████████████████████████████████████████████████▎                                   | 50/75 [04:35<02:11,  5.24s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 68%|████████████████████████████████████████████████████████████████████████▊                                  | 51/75 [04:41<02:07,  5.32s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 69%|██████████████████████████████████████████████████████████████████████████▏                                | 52/75 [04:46<02:03,  5.36s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 71%|███████████████████████████████████████████████████████████████████████████▌                               | 53/75 [04:52<02:00,  5.46s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 72%|█████████████████████████████████████████████████████████████████████████████                              | 54/75 [04:58<01:56,  5.56s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 73%|██████████████████████████████████████████████████████████████████████████████▍                            | 55/75 [05:04<01:52,  5.65s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 75%|███████████████████████████████████████████████████████████████████████████████▉                           | 56/75 [05:09<01:44,  5.48s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 76%|█████████████████████████████████████████████████████████████████████████████████▎                         | 57/75 [05:13<01:32,  5.15s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 77%|██████████████████████████████████████████████████████████████████████████████████▋                        | 58/75 [05:20<01:36,  5.69s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 79%|████████████████████████████████████████████████████████████████████████████████████▏                      | 59/75 [05:25<01:29,  5.61s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 80%|█████████████████████████████████████████████████████████████████████████████████████▌                     | 60/75 [05:32<01:30,  6.04s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 81%|███████████████████████████████████████████████████████████████████████████████████████                    | 61/75 [05:39<01:25,  6.14s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 83%|████████████████████████████████████████████████████████████████████████████████████████▍                  | 62/75 [05:44<01:16,  5.89s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 84%|█████████████████████████████████████████████████████████████████████████████████████████▉                 | 63/75 [05:50<01:09,  5.75s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 85%|███████████████████████████████████████████████████████████████████████████████████████████▎               | 64/75 [05:54<01:00,  5.47s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 87%|████████████████████████████████████████████████████████████████████████████████████████████▋              | 65/75 [06:00<00:54,  5.45s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 88%|██████████████████████████████████████████████████████████████████████████████████████████████▏            | 66/75 [06:05<00:47,  5.28s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 89%|███████████████████████████████████████████████████████████████████████████████████████████████▌           | 67/75 [06:11<00:45,  5.72s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 91%|█████████████████████████████████████████████████████████████████████████████████████████████████          | 68/75 [06:17<00:39,  5.67s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 92%|██████████████████████████████████████████████████████████████████████████████████████████████████▍        | 69/75 [06:24<00:35,  5.99s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 93%|███████████████████████████████████████████████████████████████████████████████████████████████████▊       | 70/75 [06:30<00:29,  5.96s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 71/75 [06:37<00:25,  6.41s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 96%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 72/75 [06:43<00:18,  6.18s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 73/75 [06:47<00:11,  5.55s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 74/75 [06:52<00:05,  5.53s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [06:58<00:00,  5.58s/it]


In [7]:
test_batch_json

{'questions': [{'id': '63f03006f36125a426000018',
   'type': 'summary',
   'body': 'What were the results of the DESTINY-Breast04 Trial?',
   'snippets': [{'document': 'http://www.ncbi.nlm.nih.gov/pubmed/35665782',
     'text': "st cancer, trastuzumab deruxtecan resulted in significantly longer progression-free and overall survival than the physician's choice of chemotherapy. ",
     'offsetInBeginSection': 2265,
     'offsetInEndSection': 2415,
     'beginSection': ('abstract',),
     'endSection': 'abstract'},
    {'document': 'http://www.ncbi.nlm.nih.gov/pubmed/36386213',
     'text': 'er (ABC). DESTINY-Breast04 (NCT03734029) has demonstrated the antitumor activity of trastuzumab deruxtecan (T-DXd). However, the balance between the e',
     'offsetInBeginSection': 162,
     'offsetInEndSection': 312,
     'beginSection': ('abstract',),
     'endSection': 'abstract'},
    {'document': 'http://www.ncbi.nlm.nih.gov/pubmed/31022475',
     'text': 'fter evaluating the results of these 6 