In [2]:
%load_ext autoreload
%autoreload 2
import os
import re
import json
import copy
import sys
from tqdm import tqdm
from pprint import pprint

import matplotlib.pyplot as plt
import seaborn as sns

import pytrec_eval
import numpy as np
import pandas as pd
from elasticsearch import Elasticsearch
from haystack.pipelines import Pipeline
from haystack.nodes import BM25Retriever, ElasticsearchRetriever
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import BM25Retriever, SentenceTransformersRanker
from haystack.nodes.reader import FARMReader
from haystack.utils import print_answers
import warnings
warnings.filterwarnings('ignore')

sys.path.append('../../')
import globals
from elastic_search_utils import elastic_utils
from haystack_utils.retrievers import BioASQ_Retriever
import bioasq_eval
import haystack_util

working_folder = globals.PATH.home + '/data/working_folder'
eval_home = globals.PATH.eval_home + '/'
gs_google_docs = eval_home + '/examples/aueb_google_docs/aueb_nlp-bioasq6b-submissions/'
index_name = globals.BIOASQ.index + 'working_folder'
model_id = 'doc_retrieval_test'

es = Elasticsearch(globals.ES.server)

# set document store
document_store = ElasticsearchDocumentStore()

Home path : /opt/bioasq/col-un-bioasq11
Eval path : /opt/bioasq/Evaluation-Measures


In [3]:
test_batch_doc = f'{globals.PATH.home}/data/11b_testset/BioASQ-task11bPhaseA-testset3.json'
test_batch_json = json.load(open(test_batch_doc))

In [4]:
# set document store
document_store = ElasticsearchDocumentStore()
# create the retriever
retriever = BioASQ_Retriever(document_store = document_store)

# create the Sentence Transformer Ranker
ranker = SentenceTransformersRanker(model_name_or_path="cross-encoder/ms-marco-MiniLM-L-12-v2")

# create the Query Pipeline
pipeline = Pipeline()

# add bm25 retriever
pipeline.add_node(component=retriever, name="BM25Retriever", inputs=["Query"])
pipeline.add_node(component=ranker, name="Ranker", inputs=["BM25Retriever"])
#my_model = "deepset/roberta-base-squad2"
my_model = "deepset/roberta-base-squad2-covid"
#my_model = "twielema/biobert_v1.1_pubmed_squad_v2_entericdisease"
reader = FARMReader(model_name_or_path=my_model, use_gpu=True)
pipeline.add_node(component=reader, name="Reader", inputs=["Ranker"])

# run the pipeline
prediction = pipeline.run(query="Which gene is most frequently mutated in hereditary angioedema ?", params={"BM25Retriever": {"top_k": 100}})

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

In [5]:
prediction

{'query': 'Which gene is most frequently mutated in hereditary angioedema ?',
 'no_ans_gap': 17.294288128614426,
 'answers': [<Answer {'answer': 'type 2', 'type': 'extractive', 'score': 0.8722091317176819, 'context': 'used by either a C1 esterase inhibitor deficiency (type 1) or mutation (type 2). This leads to overproduction of bradykinin resulting in vasodilation,', 'offsets_in_document': [{'start': 152, 'end': 158}], 'offsets_in_context': [{'start': 72, 'end': 78}], 'document_ids': ['33780405'], 'meta': {'title': 'Recognition, Evaluation, and Management of Pediatric Hereditary Angioedema.', 'issue': '37(4)', 'pages': '218-223', 'abstract': "Hereditary angioedema (HAE) is a rare, often underrecognized genetic disorder caused by either a C1 esterase inhibitor deficiency (type 1) or mutation (type 2). This leads to overproduction of bradykinin resulting in vasodilation, vascular leakage, and transient nonpitting angioedema occurring most frequently in the face, neck, upper airway, abdo

In [6]:
def get_snipet_from_answer(pred):
    is_extractive = pred.type == 'extractive'
    answer = {}
    answer['document'] =  globals.BIOASQ.doc_relative_url + pred.document_ids[0]
    answer['text'] = pred.context
    answer['offsetInBeginSection'] = pred.meta['abstract'].rfind(pred.context)
    answer['offsetInEndSection'] = answer['offsetInBeginSection'] + len(pred.context)
    answer["beginSection"] = "abstract",
    answer["endSection"] = "abstract"
    return answer

In [21]:
for sample in tqdm(test_batch_json['questions'], position=0):
    prediction = pipeline.run(query=sample['body'], params={"BM25Retriever": {"top_k": 100}, "Ranker": {"top_k": 50}, "Reader": {"top_k": 50} })
    snippets = []
    doc_list_by_snnipets = []
    for ans in prediction['answers']:
        if ans.score > 0.4:
            snippet = get_snipet_from_answer(ans)
            doc_list_by_snnipets.append(snippet['document'])
            snippets.append(snippet)
            
    sample['snippets'] = snippets
    doc_list = [ globals.BIOASQ.doc_relative_url + d.id for d in prediction['documents'] ]
    #sample['documents'] = doc_list[0:10]
    sample['documents'] = list(set(doc_list_by_snnipets))
    
# mindlab base ans.score > 0.3:
# mindlab tns ans.score > 0.4 and only snnipets docs

  0%|                                                                                                                    | 0/90 [00:00<?, ?it/s]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  1%|█▏                                                                                                          | 1/90 [00:06<09:46,  6.59s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

  2%|██▍                                                                                                         | 2/90 [00:14<11:10,  7.62s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

  3%|███▌                                                                                                        | 3/90 [00:22<10:48,  7.46s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

  4%|████▊                                                                                                       | 4/90 [00:25<08:28,  5.91s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  6%|██████                                                                                                      | 5/90 [00:33<09:06,  6.43s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

  7%|███████▏                                                                                                    | 6/90 [00:41<09:58,  7.13s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

  8%|████████▍                                                                                                   | 7/90 [00:49<10:02,  7.26s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  9%|█████████▌                                                                                                  | 8/90 [00:55<09:23,  6.87s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 10%|██████████▊                                                                                                 | 9/90 [01:04<10:12,  7.56s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 11%|███████████▉                                                                                               | 10/90 [01:11<09:52,  7.41s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 12%|█████████████                                                                                              | 11/90 [01:18<09:32,  7.25s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 13%|██████████████▎                                                                                            | 12/90 [01:24<09:04,  6.98s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 14%|███████████████▍                                                                                           | 13/90 [01:31<08:58,  7.00s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 16%|████████████████▋                                                                                          | 14/90 [01:36<07:56,  6.27s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 17%|█████████████████▊                                                                                         | 15/90 [01:43<08:08,  6.51s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 18%|███████████████████                                                                                        | 16/90 [01:48<07:34,  6.14s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 19%|████████████████████▏                                                                                      | 17/90 [01:54<07:32,  6.20s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 20%|█████████████████████▍                                                                                     | 18/90 [02:00<07:24,  6.18s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 21%|██████████████████████▌                                                                                    | 19/90 [02:05<06:41,  5.65s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 22%|███████████████████████▊                                                                                   | 20/90 [02:11<06:46,  5.81s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 23%|████████████████████████▉                                                                                  | 21/90 [02:17<06:47,  5.91s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 24%|██████████████████████████▏                                                                                | 22/90 [02:25<07:24,  6.53s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 26%|███████████████████████████▎                                                                               | 23/90 [02:34<07:54,  7.09s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 27%|████████████████████████████▌                                                                              | 24/90 [02:42<08:08,  7.41s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 28%|█████████████████████████████▋                                                                             | 25/90 [02:48<07:40,  7.08s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 29%|██████████████████████████████▉                                                                            | 26/90 [02:55<07:30,  7.03s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 30%|████████████████████████████████                                                                           | 27/90 [03:03<07:34,  7.22s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 31%|█████████████████████████████████▎                                                                         | 28/90 [03:10<07:30,  7.27s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 32%|██████████████████████████████████▍                                                                        | 29/90 [03:16<06:58,  6.86s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 33%|███████████████████████████████████▋                                                                       | 30/90 [03:22<06:41,  6.70s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 34%|████████████████████████████████████▊                                                                      | 31/90 [03:28<06:12,  6.31s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 36%|██████████████████████████████████████                                                                     | 32/90 [03:32<05:39,  5.85s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 37%|███████████████████████████████████████▏                                                                   | 33/90 [03:42<06:31,  6.87s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 38%|████████████████████████████████████████▍                                                                  | 34/90 [03:48<06:23,  6.84s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 39%|█████████████████████████████████████████▌                                                                 | 35/90 [03:55<06:11,  6.76s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 40%|██████████████████████████████████████████▊                                                                | 36/90 [04:02<06:15,  6.96s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 41%|███████████████████████████████████████████▉                                                               | 37/90 [04:09<06:02,  6.84s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 42%|█████████████████████████████████████████████▏                                                             | 38/90 [04:15<05:44,  6.63s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 43%|██████████████████████████████████████████████▎                                                            | 39/90 [04:23<05:57,  7.00s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 44%|███████████████████████████████████████████████▌                                                           | 40/90 [04:31<06:04,  7.29s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 46%|████████████████████████████████████████████████▋                                                          | 41/90 [04:39<06:06,  7.49s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 47%|█████████████████████████████████████████████████▉                                                         | 42/90 [04:46<05:57,  7.44s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 48%|███████████████████████████████████████████████████                                                        | 43/90 [04:55<06:06,  7.81s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 49%|████████████████████████████████████████████████████▎                                                      | 44/90 [05:01<05:38,  7.36s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 50%|█████████████████████████████████████████████████████▌                                                     | 45/90 [05:09<05:40,  7.58s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 51%|██████████████████████████████████████████████████████▋                                                    | 46/90 [05:16<05:24,  7.37s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 52%|███████████████████████████████████████████████████████▉                                                   | 47/90 [05:24<05:26,  7.60s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 53%|█████████████████████████████████████████████████████████                                                  | 48/90 [05:31<05:03,  7.23s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 54%|██████████████████████████████████████████████████████████▎                                                | 49/90 [05:39<05:08,  7.51s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 56%|███████████████████████████████████████████████████████████▍                                               | 50/90 [05:45<04:43,  7.09s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 57%|████████████████████████████████████████████████████████████▋                                              | 51/90 [05:53<04:46,  7.35s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 58%|█████████████████████████████████████████████████████████████▊                                             | 52/90 [06:00<04:34,  7.23s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 59%|███████████████████████████████████████████████████████████████                                            | 53/90 [06:06<04:11,  6.80s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 60%|████████████████████████████████████████████████████████████████▏                                          | 54/90 [06:12<04:02,  6.74s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 61%|█████████████████████████████████████████████████████████████████▍                                         | 55/90 [06:21<04:19,  7.42s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 62%|██████████████████████████████████████████████████████████████████▌                                        | 56/90 [06:27<03:54,  6.90s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 63%|███████████████████████████████████████████████████████████████████▊                                       | 57/90 [06:35<03:59,  7.24s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 64%|████████████████████████████████████████████████████████████████████▉                                      | 58/90 [06:44<04:06,  7.69s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 66%|██████████████████████████████████████████████████████████████████████▏                                    | 59/90 [06:55<04:30,  8.73s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 67%|███████████████████████████████████████████████████████████████████████▎                                   | 60/90 [07:01<04:02,  8.07s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 68%|████████████████████████████████████████████████████████████████████████▌                                  | 61/90 [07:10<03:56,  8.14s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 69%|█████████████████████████████████████████████████████████████████████████▋                                 | 62/90 [07:18<03:50,  8.24s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 70%|██████████████████████████████████████████████████████████████████████████▉                                | 63/90 [07:25<03:30,  7.78s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 71%|████████████████████████████████████████████████████████████████████████████                               | 64/90 [07:35<03:39,  8.46s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 72%|█████████████████████████████████████████████████████████████████████████████▎                             | 65/90 [07:43<03:25,  8.21s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 73%|██████████████████████████████████████████████████████████████████████████████▍                            | 66/90 [07:52<03:24,  8.51s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 74%|███████████████████████████████████████████████████████████████████████████████▋                           | 67/90 [07:58<03:01,  7.88s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 76%|████████████████████████████████████████████████████████████████████████████████▊                          | 68/90 [08:05<02:49,  7.69s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 77%|██████████████████████████████████████████████████████████████████████████████████                         | 69/90 [08:13<02:42,  7.76s/it]

Inferencing Samples:   0%|          | 0/4 [00:00<?, ? Batches/s]

 78%|███████████████████████████████████████████████████████████████████████████████████▏                       | 70/90 [08:24<02:51,  8.55s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 79%|████████████████████████████████████████████████████████████████████████████████████▍                      | 71/90 [08:31<02:32,  8.01s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 80%|█████████████████████████████████████████████████████████████████████████████████████▌                     | 72/90 [08:33<01:57,  6.50s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 81%|██████████████████████████████████████████████████████████████████████████████████████▊                    | 73/90 [08:40<01:49,  6.43s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 82%|███████████████████████████████████████████████████████████████████████████████████████▉                   | 74/90 [08:44<01:31,  5.71s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 83%|█████████████████████████████████████████████████████████████████████████████████████████▏                 | 75/90 [08:51<01:32,  6.18s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 84%|██████████████████████████████████████████████████████████████████████████████████████████▎                | 76/90 [08:58<01:28,  6.30s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 86%|███████████████████████████████████████████████████████████████████████████████████████████▌               | 77/90 [09:04<01:21,  6.26s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 87%|████████████████████████████████████████████████████████████████████████████████████████████▋              | 78/90 [09:10<01:13,  6.16s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 88%|█████████████████████████████████████████████████████████████████████████████████████████████▉             | 79/90 [09:17<01:10,  6.44s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 89%|███████████████████████████████████████████████████████████████████████████████████████████████            | 80/90 [09:25<01:08,  6.81s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

 90%|████████████████████████████████████████████████████████████████████████████████████████████████▎          | 81/90 [09:33<01:04,  7.22s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 91%|█████████████████████████████████████████████████████████████████████████████████████████████████▍         | 82/90 [09:40<00:58,  7.33s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 92%|██████████████████████████████████████████████████████████████████████████████████████████████████▋        | 83/90 [09:47<00:50,  7.15s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 93%|███████████████████████████████████████████████████████████████████████████████████████████████████▊       | 84/90 [09:53<00:41,  6.94s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████      | 85/90 [10:00<00:33,  6.77s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 96%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 86/90 [10:05<00:24,  6.22s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 97%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 87/90 [10:11<00:18,  6.33s/it]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 88/90 [10:18<00:13,  6.55s/it]

Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 89/90 [10:23<00:05,  5.89s/it]

Inferencing Samples:   0%|          | 0/3 [00:00<?, ? Batches/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [10:30<00:00,  7.01s/it]


In [32]:
for q in test_batch_json['questions']:
    for s in q['snippets']:
        s['beginSection'] = s['beginSection'][0] 

In [33]:
model_id = 'bm25_ranker_reader_bioqa_ans_score_0_4_sentence_sim'
submission_file_name = f'{globals.PATH.home}/data/processed/{test_batch_doc.split("/")[-1].replace(".json","")}_model_{model_id}.json'

with open(submission_file_name, "w") as outfile:
    json.dump(test_batch_json, outfile, indent=2)

## Improve Passages

In [34]:
import editdistance
import nltk.data
import src.elastic_search_utils.elastic_utils as es_util

tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

In [35]:
from rapidfuzz.distance import Levenshtein
Levenshtein.normalized_similarity("levenshtein", "levenshtein")

1.0

In [36]:
test_batch_json = json.load(open(submission_file_name))

In [37]:
for sample in tqdm(test_batch_json['questions'], position=0):
    for s in sample['snippets']:
        text_raw = s['text']
        doc_id = s['document'].replace(globals.BIOASQ.doc_relative_url,'')
        res = es_util.search_doc_by_id(doc_id)
        doc_title = res[doc_id]['title']
        doc_abstract =  res[doc_id]['abstract']
        sentences = tokenizer.tokenize(doc_abstract)
        # validate if the sentence is similar to the snippet
        for sentence in sentences:
            new_text = None
            if text_raw in sentence: 
                new_text = sentence
            else:
                if Levenshtein.normalized_similarity(sentence,text_raw) > 0.5: 
                    new_text = sentence
            if new_text:
                    s['text'] = new_text
                    s['offsetInBeginSection'] = doc_abstract.find(new_text)
                    s['offsetInEndSection'] = s['offsetInBeginSection'] + len(new_text)
                    s["beginSection"] = "abstract"
                    s["endSection"] = "abstract"
        # add title search

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:01<00:00, 54.40it/s]


In [38]:
import copy

In [39]:
copy_test_batch_json = copy.deepcopy(test_batch_json)

for sample in tqdm(test_batch_json['questions'], position=0):
    sample['documents'] = sample['documents'][0:10]
    sample['snippets'] = sample['snippets'][0:10]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 389162.23it/s]


In [40]:
with open(submission_file_name, "w") as outfile:
    json.dump(test_batch_json, outfile, indent=2)

In [41]:
submission_file_name

'/opt/bioasq/col-un-bioasq11/data/processed/BioASQ-task11bPhaseA-testset3_model_bm25_ranker_reader_bioqa_ans_score_0_4_sentence_sim.json'

# ChatGPT search

In [37]:
chat_gpt_fn = f'{globals.PATH.home}/data/11b_testset/ChatGPT.json'
chat_gpt = json.load(open(chat_gpt_fn, "r"))

In [1]:
chatgpt_questions = dict((elem['id'],elem) for elem in chat_gpt['questions'])

NameError: name 'chat_gpt' is not defined

In [70]:
for sample in tqdm(test_batch_json['questions'], position=0):
    doc_list = []
    snippet_list = []
    for s in sample['snippets']:
        text_raw = s['text']
        # validate if the sentence is similar to the snippet
        query = chatgpt_questions[sample['id']]['chatgpt-out']['short-ans']
        if Levenshtein.normalized_similarity(query,text_raw) > 0.1: 
            snippet_list.append(s)
            doc_list.append(s['document'])
    sample['snippets'] = snippet_list
    doc_list = set(doc_list)
    sample['documents'] = list(doc_list)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [00:00<00:00, 8160.76it/s]


In [71]:
test_batch_json

{'questions': [{'id': '63f03006f36125a426000018',
   'type': 'summary',
   'body': 'What were the results of the DESTINY-Breast04 Trial?',
   'snippets': [{'document': 'http://www.ncbi.nlm.nih.gov/pubmed/35665782',
     'text': "CONCLUSIONS\nIn this trial involving patients with HER2-low metastatic breast cancer, trastuzumab deruxtecan resulted in significantly longer progression-free and overall survival than the physician's choice of chemotherapy.",
     'offsetInBeginSection': 2191,
     'offsetInEndSection': 2414,
     'beginSection': 'abstract',
     'endSection': 'abstract'},
    {'document': 'http://www.ncbi.nlm.nih.gov/pubmed/36386213',
     'text': 'DESTINY-Breast04 (NCT03734029) has demonstrated the antitumor activity of trastuzumab deruxtecan (T-DXd).',
     'offsetInBeginSection': 172,
     'offsetInEndSection': 277,
     'beginSection': 'abstract',
     'endSection': 'abstract'},
    {'document': 'http://www.ncbi.nlm.nih.gov/pubmed/31022475',
     'text': 'After evaluating

In [72]:
with open(submission_file_name, "w") as outfile:
    json.dump(test_batch_json, outfile, indent=2)

In [None]:
copy_test_batch_json

In [48]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('deepset/roberta-base-squad2')

for sample in tqdm(test_batch_json['questions'], position=0):
    doc_list = []
    snippet_list = []
    query = chatgpt_questions[sample['id']]['chatgpt-out']['short-ans']
    en_query = model.encode(query)
    for s in sample['snippets']:
        text_raw = s['text']
        en_snippet = model.encode(text_raw)
        result = util.cos_sim(en_query, en_snippet)
        if result > 0.98:
            snippet_list.append(s)
            doc_list.append(s['document'])
    sample['documents'] = doc_list[0:10]
    sample['snippets'] = snippet_list[0:10]

Some weights of the model checkpoint at /home/andresr/.cache/torch/sentence_transformers/deepset_roberta-base-squad2 were not used when initializing RobertaModel: ['qa_outputs.bias', 'qa_outputs.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at /home/andresr/.cache/torch/sentence_transformers/deepset_roberta-base-squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictio

In [None]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('deepset/roberta-base-squad2')

for sample in tqdm(test_batch_json['questions'], position=0):
    doc_list = []
    snippet_list = []
    query = chatgpt_questions[sample['id']]['chatgpt-out']['short-ans']
    en_query = model.encode(query)
    for s in sample['snippets']:
        text_raw = s['text']
        en_snippet = model.encode(text_raw)
        result = util.cos_sim(en_query, en_snippet)
        if result > 0.98:
            snippet_list.append(s)
            doc_list.append(s['document'])
    sample['documents'] = doc_list[0:10]
    sample['snippets'] = snippet_list[0:10]

In [49]:
test_batch_json

{'questions': [{'id': '63f03006f36125a426000018',
   'type': 'summary',
   'body': 'What were the results of the DESTINY-Breast04 Trial?',
   'snippets': [],
   'documents': []},
  {'id': '641ad941690f196b5100003d',
   'type': 'yesno',
   'body': 'Are LOQ and LOD the same?',
   'snippets': [{'document': 'http://www.ncbi.nlm.nih.gov/pubmed/21603916',
     'text': 'Real-time PCR (qPCR) is the principal technique for the quantification of pathogen biomass in host tissue, yet no generic methods exist for the determination of the limit of quantification (LOQ) and the limit of detection (LOD) in qPCR.',
     'offsetInBeginSection': 0,
     'offsetInEndSection': 236,
     'beginSection': 'abstract',
     'endSection': 'abstract'}],
   'documents': ['http://www.ncbi.nlm.nih.gov/pubmed/21603916']},
  {'id': '642a029d57b1c7a315000011',
   'type': 'summary',
   'body': 'Histone acetyltransferases (HATs) transfer acetyl groups from acetyl CoA to lysine residues on histones. What is the purpose of 

In [16]:
for sample in tqdm(test_batch_json['questions'], position=0):
    query = sample['body']
    if len(chatgpt_questions[sample['id']]['chatgpt-out']['queries']) > 1:
        query = ' '.join(chatgpt_questions[sample['id']]['chatgpt-out']['queries'])
    prediction = pipeline.run(query=query, params={"BM25Retriever": {"top_k": 50}, "Ranker": {"top_k": 30}, "Reader": {"top_k": 30} })
    print(query)
    snippets = []
    doc_list_by_snnipets = []
    for ans in prediction['answers']:
        if ans.score > 0.4:
            snippet = get_snipet_from_answer(ans)
            doc_list_by_snnipets.append(snippet['document'])
            snippets.append(snippet)

    sample['snippets'] = snippets
    doc_list = [ globals.BIOASQ.doc_relative_url + d.id for d in prediction['documents'] ]
    #sample['documents'] = doc_list[0:10]
    sample['documents'] = list(set(doc_list_by_snnipets))
    
# mindlab base ans.score > 0.3:
# mindlab tns ans.score > 0.4 and only snnipets docs

  0%|                                                                                                                    | 0/75 [00:00<?, ?it/s]

Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  1%|█▍                                                                                                          | 1/75 [00:06<07:39,  6.20s/it]

DESTINY-Breast04 trial results Trastuzumab deruxtecan efficacy in breast cancer Phase III study of trastuzumab deruxtecan in HER2-positive breast cancer Clinical trial of trastuzumab deruxtecan in breast cancer DESTINY-Breast04 trial design and outcomes HER2-targeted therapy in metastatic breast cancer Trastuzumab deruxtecan safety profile in breast cancer Antibody-drug conjugate therapy for breast cancer


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  3%|██▉                                                                                                         | 2/75 [00:10<06:10,  5.07s/it]

LOQ vs. LOD Limit of quantification vs. limit of detection Analytical chemistry LOQ and LOD Clinical laboratory LOQ and LOD Biomedical assay sensitivity and specificity LOQ and LOD in medical research


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  4%|████▎                                                                                                       | 3/75 [00:15<05:48,  4.84s/it]

Function of histone acetylation Role of HATs in gene expression Epigenetic regulation by histone acetylation Histone acetylation and chromatin remodeling Lysine acetylation and protein function Acetylation of non-histone proteins


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  5%|█████▊                                                                                                      | 4/75 [00:19<05:33,  4.70s/it]

Gene mutations in hereditary angioedema C1 inhibitor gene mutations in HAE SERPING1 mutations and HAE F12 gene mutations and HAE Angioedema and genetic mutations Hereditary angioedema genetics


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  7%|███████▏                                                                                                    | 5/75 [00:23<05:22,  4.60s/it]

Casimersen MOA Antisense oligonucleotides and exon skipping Duchenne muscular dystrophy and casimersen Pharmacokinetics and pharmacodynamics of casimersen Clinical trials of casimersen RNA splicing modulation by casimersen


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  8%|████████▋                                                                                                   | 6/75 [00:28<05:26,  4.73s/it]

tiragolumab AND mechanism of action tiragolumab AND PD-1 tiragolumab AND immunotherapy tiragolumab AND cancer tiragolumab AND clinical trials


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

  9%|██████████                                                                                                  | 7/75 [00:33<05:27,  4.82s/it]

NMOSD serum biomarkers disease activity aquaporin-4 antibody NMOSD biomarkers glial fibrillary acidic protein NMOSD biomarkers myelin oligodendrocyte glycoprotein NMOSD biomarkers C-reactive protein NMOSD biomarkers


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 11%|███████████▌                                                                                                | 8/75 [00:37<04:56,  4.43s/it]

COVID-19 thrombosis complications COVID-19 venous thromboembolism COVID-19 arterial thrombosis COVID-19 coagulopathy COVID-19 hypercoagulability


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 12%|████████████▉                                                                                               | 9/75 [00:41<04:51,  4.41s/it]

Cinryze indication Cinryze mechanism of action Cinryze clinical trials Cinryze safety Cinryze efficacy


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 13%|██████████████▎                                                                                            | 10/75 [00:46<04:57,  4.58s/it]

Machine learning survival analysis Statistical survival analysis Machine learning vs statistical methods for survival analysis Comparison of machine learning and statistical methods for survival analysis


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 15%|███████████████▋                                                                                           | 11/75 [00:51<04:44,  4.45s/it]

ZF2001 is used for which disease?


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 16%|█████████████████                                                                                          | 12/75 [00:54<04:25,  4.22s/it]

Mammal with lowest chromosome number Karyotype of mammals Chromosome number in mammals Animal genome sequencing


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 17%|██████████████████▌                                                                                        | 13/75 [00:58<04:08,  4.01s/it]

Histone modifications amino acids Histone acetyltransferases targets Histone methyltransferases targets Epigenetic modifications Histone code


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 19%|███████████████████▉                                                                                       | 14/75 [01:02<04:05,  4.02s/it]

Ruconest manufacturer Rhucin manufacturer C1 esterase inhibitor manufacturer Plasma-derived therapies Hereditary angioedema treatment


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 20%|█████████████████████▍                                                                                     | 15/75 [01:06<04:03,  4.06s/it]

Duchenne muscular dystrophy diagnosis age Clinical presentation Duchenne muscular dystrophy Motor milestones Duchenne muscular dystrophy Creatine kinase levels Duchenne muscular dystrophy Genetic testing Duchenne muscular dystrophy


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 21%|██████████████████████▊                                                                                    | 16/75 [01:10<04:04,  4.14s/it]

TONE trial erythropoietin optic neuritis Erythropoietin optic neuritis clinical trial Optic neuritis treatment TONE trial Visual function outcomes erythropoietin optic neuritis Randomized controlled trial erythropoietin optic neuritis


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 23%|████████████████████████▎                                                                                  | 17/75 [01:15<04:15,  4.41s/it]

hepatotoxicity R value R value liver toxicity correlation R value hepatotoxicity hepatotoxicity Pearson correlation coefficient R squared value liver toxicity


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 24%|█████████████████████████▋                                                                                 | 18/75 [01:21<04:28,  4.72s/it]

COVID-19 Omicron severity Omicron variant clinical outcomes COVID-19 variant comparison severity Omicron vs other variants disease severity SARS-CoV-2 Omicron severity


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 25%|███████████████████████████                                                                                | 19/75 [01:24<04:06,  4.41s/it]

hereditary angioedema overview genetic basis of angioedema clinical presentation of hereditary angioedema treatment of hereditary angioedema pathophysiology of angioedema


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 27%|████████████████████████████▌                                                                              | 20/75 [01:28<03:48,  4.16s/it]

viltolarsen mechanism of action how does viltolarsen work viltolarsen pharmacodynamics mode of action of viltolarsen viltolarsen molecular mechanism


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 28%|█████████████████████████████▉                                                                             | 21/75 [01:32<03:48,  4.22s/it]

bexarotene multiple sclerosis bexarotene relapsing-remitting multiple sclerosis efficacy of bexarotene in multiple sclerosis safety of bexarotene in multiple sclerosis bexarotene clinical trials multiple sclerosis


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 29%|███████████████████████████████▍                                                                           | 22/75 [01:37<03:42,  4.20s/it]

hepcidin regulation hepcidin function role of hepcidin in iron metabolism hepcidin and erythropoiesis hepcidin and inflammation


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 31%|████████████████████████████████▊                                                                          | 23/75 [01:41<03:49,  4.40s/it]

analgesics antipyretics COVID-19 vaccine side effects management of COVID-19 vaccine side effects acetaminophen COVID-19 vaccine ibuprofen COVID-19 vaccine NSAIDs COVID-19 vaccine


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 32%|██████████████████████████████████▏                                                                        | 24/75 [01:47<03:55,  4.63s/it]

recombinant human C1 inhibitor protein HAE first drug for acute HAE attacks C1INH for HAE treatment rhC1INH for HAE


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 33%|███████████████████████████████████▋                                                                       | 25/75 [01:50<03:41,  4.42s/it]

viltolarsen AND DMD viltolarsen AND Duchenne muscular dystrophy viltolarsen AND gene therapy viltolarsen AND exon skipping


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 35%|█████████████████████████████████████                                                                      | 26/75 [01:53<03:15,  3.99s/it]

Cat eye syndrome AND triad Cat eye syndrome AND chromosomal abnormalities Cat eye syndrome AND ocular abnormalities Cat eye syndrome AND preauricular tags Cat eye syndrome AND genetics


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 36%|██████████████████████████████████████▌                                                                    | 27/75 [01:58<03:24,  4.27s/it]

OXLUMO AND lumasiran AND primary hyperoxaluria lumasiran AND RNAi AND primary hyperoxaluria lumasiran AND ALNYLAM-117 AND primary hyperoxaluria lumasiran AND clinical trials AND primary hyperoxaluria lumasiran AND mechanism of action AND primary hyperoxaluria


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 37%|███████████████████████████████████████▉                                                                   | 28/75 [02:04<03:34,  4.57s/it]

inhaled glucocorticoids AND COVID-19 asthma AND COVID-19 AND glucocorticoids COPD AND COVID-19 AND glucocorticoids COVID-19 AND steroids AND respiratory disease COVID-19 AND corticosteroids AND respiratory disease


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 39%|█████████████████████████████████████████▎                                                                 | 29/75 [02:08<03:32,  4.63s/it]

Zanubrutinib AND BTK inhibitor AND FDA first-generation BTK inhibitor AND Zanubrutinib Zanubrutinib AND FDA approval AND BTK inhibitor Zanubrutinib AND clinical trials AND BTK inhibitor Zanubrutinib AND mechanism of action AND BTK inhibitor


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 40%|██████████████████████████████████████████▊                                                                | 30/75 [02:13<03:30,  4.67s/it]

eteplirsen AND Duchenne muscular dystrophy eteplirsen AND dystrophin eteplirsen AND exon skipping eteplirsen AND clinical trials eteplirsen AND mechanism of action


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 41%|████████████████████████████████████████████▏                                                              | 31/75 [02:19<03:37,  4.95s/it]

Cinpanemab AND Parkinson's Disease Cinpanemab AND clinical trials Cinpanemab AND efficacy Cinpanemab AND safety Cinpanemab AND mechanism of action


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 43%|█████████████████████████████████████████████▋                                                             | 32/75 [02:23<03:27,  4.82s/it]

Ashwell receptor AND organ Ashwell receptor AND liver Ashwell receptor AND glycoprotein clearance Ashwell receptor AND hepatocytes


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 44%|███████████████████████████████████████████████                                                            | 33/75 [02:28<03:16,  4.68s/it]

enhancers AND chromatin landscape AND histone marks histone marks AND enhancer activity histone modifications AND gene regulation chromatin structure AND enhancers


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 45%|████████████████████████████████████████████████▌                                                          | 34/75 [02:32<03:11,  4.67s/it]

CRISPR/Cas12a AND EGFR mutations AND circulating DNA CRISPR diagnostics AND EGFR mutations CRISPR/Cas12a AND liquid biopsy AND cancer CRISPR/Cas12a AND sensitivity AND specificity


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 47%|█████████████████████████████████████████████████▉                                                         | 35/75 [02:36<02:55,  4.38s/it]

Casimersen AND DMD Casimersen AND Duchenne muscular dystrophy Casimersen AND exon skipping therapy


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 48%|███████████████████████████████████████████████████▎                                                       | 36/75 [02:41<02:52,  4.41s/it]

CIS43LS AND target CIS43LS complement-dependent cytotoxicity CIS43LS acute myeloid leukemia


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 49%|████████████████████████████████████████████████████▊                                                      | 37/75 [02:46<02:57,  4.68s/it]

Cal-light AND function Cal-light fluorescence microscopy Cal-light synaptic transmission Cal-light T-cell activation


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 51%|██████████████████████████████████████████████████████▏                                                    | 38/75 [02:50<02:48,  4.56s/it]

RNA polymerase binding site gene transcription Promoter region RNA polymerase gene transcription Transcription initiation site RNA polymerase RNA polymerase initiation complex gene transcription Transcription factor RNA polymerase binding gene


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 52%|███████████████████████████████████████████████████████▋                                                   | 39/75 [02:54<02:41,  4.49s/it]

PRP-40 microexon splicing Role of PRP-40 in microexon splicing PRP-40 and alternative splicing Microexon splicing and splicing factors PRP-40 and pre-mRNA splicing


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 53%|█████████████████████████████████████████████████████████                                                  | 40/75 [02:59<02:36,  4.47s/it]

Age of loss of independent ambulation in Duchenne muscular dystrophy Duchenne muscular dystrophy and ambulation loss Mobility loss in Duchenne muscular dystrophy Age at wheelchair dependence in Duchenne muscular dystrophy Duchenne muscular dystrophy and motor function decline


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 55%|██████████████████████████████████████████████████████████▍                                                | 41/75 [03:03<02:33,  4.52s/it]

Ocrelizumab and primary progressive multiple sclerosis Effectiveness of ocrelizumab in primary progressive multiple sclerosis Clinical trials of ocrelizumab in primary progressive multiple sclerosis Ocrelizumab and disease-modifying therapy for primary progressive multiple sclerosis Ocrelizumab and outcomes in primary progressive multiple sclerosis


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 56%|███████████████████████████████████████████████████████████▉                                               | 42/75 [03:08<02:26,  4.45s/it]

Indocyanine green (ICG) imaging Clinical applications of indocyanine green (ICG) Indocyanine green (ICG) fluorescence angiography Indocyanine green (ICG) and liver function assessment Indocyanine green (ICG) and lymphatic imaging


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 57%|█████████████████████████████████████████████████████████████▎                                             | 43/75 [03:13<02:27,  4.62s/it]

Alternative splicing and gene expression Mechanisms of alternative splicing Splice variants and gene regulation Transcript diversity and gene function Post-transcriptional regulation of gene expression


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 59%|██████████████████████████████████████████████████████████████▊                                            | 44/75 [03:19<02:40,  5.17s/it]

SRRM4 and tumor growth Silencing SRRM4 and cancer progression SRRM4 knockdown and cancer cell proliferation Role of SRRM4 in cancer Therapeutic potential of SRRM4 inhibition in cancer


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 60%|████████████████████████████████████████████████████████████████▏                                          | 45/75 [03:23<02:27,  4.90s/it]

Life expectancy in Duchenne muscular dystrophy Prognosis in Duchenne muscular dystrophy Survival rates in Duchenne muscular dystrophy Clinical outcomes in Duchenne muscular dystrophy Mortality in Duchenne muscular dystrophy


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 61%|█████████████████████████████████████████████████████████████████▋                                         | 46/75 [03:29<02:30,  5.18s/it]

Daridorexant and insomnia Efficacy of daridorexant in treating insomnia Daridorexant clinical trials for insomnia Comparative effectiveness of daridorexant for insomnia Safety and tolerability of daridorexant in insomnia patients


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 63%|███████████████████████████████████████████████████████████████████                                        | 47/75 [03:35<02:25,  5.19s/it]

Erythroferrone-producing cells Cellular sources of erythroferrone Erythroferrone expression in different cell types Regulation of erythroferrone production in erythroid cells Erythroferrone and erythropoiesis


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 64%|████████████████████████████████████████████████████████████████████▍                                      | 48/75 [03:40<02:19,  5.16s/it]

Mediator complex and gene expression Mediator role in transcription Mechanisms of Mediator-mediated gene regulation Mediator function in RNA polymerase II activity Mediator and enhancer-promoter interactions


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 65%|█████████████████████████████████████████████████████████████████████▉                                     | 49/75 [03:43<02:00,  4.63s/it]

Definition of FBDD FBDD acronym meaning FBDD and drug discovery Applications of FBDD in drug development Comparison of FBDD with other drug discovery approaches


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 67%|███████████████████████████████████████████████████████████████████████▎                                   | 50/75 [03:48<01:59,  4.79s/it]

Deucravacitinib and psoriasis treatment Efficacy of deucravacitinib in psoriasis patients Clinical trials of deucravacitinib for psoriasis Safety profile of deucravacitinib in psoriasis patients Mechanisms of action of deucravacitinib in psoriasis


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 68%|████████████████████████████████████████████████████████████████████████▊                                  | 51/75 [03:53<01:53,  4.71s/it]

Functions of orexin/hypocretin neurons Regulation of sleep-wake cycle by orexin/hypocretin neurons Orexin/hypocretin and feeding behavior Orexin/hypocretin and reward processing Role of orexin/hypocretin in addiction and drug abuse


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 69%|██████████████████████████████████████████████████████████████████████████▏                                | 52/75 [03:58<01:53,  4.93s/it]

Epigenetic modification of introns and gene expression Role of intronic DNA methylation in gene regulation Chromatin structure and intron-mediated gene expression Histone modifications in introns and gene expression


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 71%|███████████████████████████████████████████████████████████████████████████▌                               | 53/75 [04:02<01:42,  4.66s/it]

Keytruda FDA approval history Pembrolizumab (Keytruda) indications FDA approved uses of Keytruda


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 72%|█████████████████████████████████████████████████████████████████████████████                              | 54/75 [04:05<01:27,  4.18s/it]

Achard-Thiers syndrome clinical features Diabetes mellitus with hirsutism and virilism (Achard-Thiers syndrome) Achard-Thiers syndrome symptoms


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 73%|██████████████████████████████████████████████████████████████████████████████▍                            | 55/75 [04:10<01:26,  4.34s/it]

Luminopsins structure Luminopsins composition Fusion proteins of luminol and Rhodopsin Luminopsins function


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 75%|███████████████████████████████████████████████████████████████████████████████▉                           | 56/75 [04:13<01:17,  4.09s/it]

What are coactivators?


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 76%|█████████████████████████████████████████████████████████████████████████████████▎                         | 57/75 [04:17<01:08,  3.81s/it]

PROTACs definition Mechanism of action of PROTACs Applications of PROTACs Examples of successful PROTACs


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 77%|██████████████████████████████████████████████████████████████████████████████████▋                        | 58/75 [04:22<01:12,  4.29s/it]

ALBI grade components Variables in ALBI score Calculation of ALBI grade Use of ALBI score in liver disease


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 79%|████████████████████████████████████████████████████████████████████████████████████▏                      | 59/75 [04:26<01:07,  4.24s/it]

Feline Infectious Peritonitis treatment drugs FIP therapy options in cats Antiviral therapy for FIP


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 80%|█████████████████████████████████████████████████████████████████████████████████████▌                     | 60/75 [04:32<01:11,  4.78s/it]

Differentiating COVID-19 from influenza clinically Clinical features distinguishing COVID-19 from other respiratory illnesses Diagnosis of COVID-19 versus influenza COVID-19 and influenza differential diagnosis Comparing clinical characteristics of COVID-19 and influenza


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 81%|███████████████████████████████████████████████████████████████████████████████████████                    | 61/75 [04:38<01:09,  4.98s/it]

Paclitaxel and cancer treatment Docetaxel and cancer treatment Doxorubicin and cancer treatment Epirubicin and cancer treatment Chemotherapy drugs and approved cancer indications


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 83%|████████████████████████████████████████████████████████████████████████████████████████▍                  | 62/75 [04:42<01:01,  4.75s/it]

Talquetamab clinical trials Talquetamab mechanism of action Talquetamab pharmacokinetics Talquetamab safety and efficacy Talquetamab and disease indication


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 84%|█████████████████████████████████████████████████████████████████████████████████████████▉                 | 63/75 [04:46<00:53,  4.43s/it]

Feline Spongiform Encephalopathy clinical features Transmission of Feline Spongiform Encephalopathy Feline Spongiform Encephalopathy diagnosis Pathology of Feline Spongiform Encephalopathy Control of Feline Spongiform Encephalopathy


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 85%|███████████████████████████████████████████████████████████████████████████████████████████▎               | 64/75 [04:50<00:49,  4.51s/it]

Untranslated regions and gene expression Role of UTRs in post-transcriptional regulation UTRs and mRNA stability Translation initiation sites and UTRs UTRs and microRNA-mediated gene regulation


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 87%|████████████████████████████████████████████████████████████████████████████████████████████▋              | 65/75 [04:55<00:46,  4.68s/it]

Alternative splicing and heart disease Cardiac-specific alternative splicing events Splice variants and cardiac function Alternative splicing in cardiovascular development Dysregulation of alternative splicing in heart disease


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 88%|██████████████████████████████████████████████████████████████████████████████████████████████▏            | 66/75 [05:01<00:43,  4.87s/it]

Intepirdine and Alzheimer's disease Efficacy of Intepirdine in Alzheimer's disease Clinical trials of Intepirdine for Alzheimer's disease Mechanism of action of Intepirdine in Alzheimer's disease Safety profile of Intepirdine in Alzheimer's disease


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 89%|███████████████████████████████████████████████████████████████████████████████████████████████▌           | 67/75 [05:06<00:40,  5.08s/it]

Gal-Nac conjugated siRNA and human cells Targeting liver cells with Gal-Nac conjugated siRNA or ASO Gal-Nac conjugated siRNA delivery to hepatocytes Gal-Nac conjugated ASO therapy for liver diseases


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 91%|█████████████████████████████████████████████████████████████████████████████████████████████████          | 68/75 [05:11<00:33,  4.85s/it]

Rebound COVID-19 Reinfection COVID-19 COVID-19 recurrence SARS-CoV-2 reactivation COVID-19 relapse


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 92%|██████████████████████████████████████████████████████████████████████████████████████████████████▍        | 69/75 [05:16<00:30,  5.03s/it]

Zanubrutinib clinical applications Zanubrutinib FDA approval Zanubrutinib indications in the United States Zanubrutinib uses in cancer therapy


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 93%|███████████████████████████████████████████████████████████████████████████████████████████████████▊       | 70/75 [05:21<00:24,  4.97s/it]

Rilonacept pericarditis Rilonacept efficacy in pericarditis Rilonacept clinical trials pericarditis Rilonacept mechanism of action pericarditis


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 71/75 [05:26<00:20,  5.02s/it]

1p19q co-deletion tumors 1p19q co-deletion and glioma 1p19q co-deletion and oligodendroglioma 1p19q co-deletion and anaplastic oligodendroglioma 1p19q co-deletion and mixed oligoastrocytoma


Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

 96%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 72/75 [05:30<00:13,  4.60s/it]

Xist gene function X-chromosome inactivation mechanism Xist gene product Xist RNA structure and function


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 73/75 [05:34<00:08,  4.41s/it]

Microexon definition Microexon function Microexon splicing Microexon and neurodevelopmental disorders


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 74/75 [05:38<00:04,  4.32s/it]

Tebentafusp clinical trials uveal melanoma Tebentafusp efficacy uveal melanoma Immunotherapy uveal melanoma Antigen-specific T-cell therapy uveal melanoma


Inferencing Samples:   0%|          | 0/2 [00:00<?, ? Batches/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [05:42<00:00,  4.57s/it]

Fenton reaction mechanism Hydroxyl radical generation in Fenton reaction Fenton reaction and oxidative stress Iron-catalyzed reactions in biological systems





In [18]:
test_batch_json

{'questions': [{'id': '63f03006f36125a426000018',
   'type': 'summary',
   'body': 'What were the results of the DESTINY-Breast04 Trial?',
   'snippets': [],
   'documents': []},
  {'id': '641ad941690f196b5100003d',
   'type': 'yesno',
   'body': 'Are LOQ and LOD the same?',
   'snippets': [],
   'documents': []},
  {'id': '642a029d57b1c7a315000011',
   'type': 'summary',
   'body': 'Histone acetyltransferases (HATs) transfer acetyl groups from acetyl CoA to lysine residues on histones. What is the purpose of this transfer?',
   'snippets': [],
   'documents': []},
  {'id': '6432fc0457b1c7a31500001f',
   'type': 'factoid',
   'body': 'Which gene is most frequently mutated in hereditary angioedema ?',
   'snippets': [],
   'documents': []},
  {'id': '64178edb690f196b51000025',
   'type': 'summary',
   'body': "What is casimersen's mechanism of action?",
   'snippets': [],
   'documents': []},
  {'id': '6402bf2b201352f04a000007',
   'type': 'summary',
   'body': 'What is the mechanism of

Downloading:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.45k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/691 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/767 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/300 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/669k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/412 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

[[-0.7673091  -0.02201538  0.21489295 ...  0.4305086   0.9445836
   0.3237879 ]
 [ 0.04580967  0.2503939   1.1280602  ...  0.34196827  0.4122522
   0.14661177]]
