In [None]:
!pip install jupyter pandas tqdm transformers elasticsearch openpyxl telepot python-telegram-bot datasets sentencepiece

In [1]:
import json
import glob
import numpy as np
import os
import pandas as pd
from pandas import ExcelWriter
import re
import torch
import argparse
import concurrent.futures
from elasticsearch import Elasticsearch,helpers
from tqdm import tqdm
from transformers import BertForQuestionAnswering,BertTokenizer
from transformers import BartTokenizer, BartForConditionalGeneration
from xml.etree.ElementTree import parse
from IPython.core.display import display, HTML
from transformers import PegasusTokenizer, BigBirdPegasusForConditionalGeneration, BigBirdPegasusConfig, AutoTokenizer
from transformers import ElectraTokenizer, ElectraForQuestionAnswering

In [2]:
TORCH_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
QA_MODEL = None
QA_TOKENIZER = None
SUMMARY_TOKENIZER = None
SUMMARY_MODEL = None
tqdm.pandas()
SEP_TOKEN = "[SEP]"
CLS_TOKEN = "[CLS]"
PIECE_MAX_SIZE = 512

In [3]:
!docker run -dt -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.12.1

68c399188c0cf1cbcfedfdf60bce61e9b1a6f2cc38f5806d3091e8164ae71989
docker: Error response from daemon: driver failed programming external connectivity on endpoint epic_chaum (01898ad015c20b15718f4f02600814ac6274134f8577358e46531d22932afe32): Bind for 0.0.0.0:9300 failed: port is already allocated.


In [8]:
ES_CLIENT = Elasticsearch(http_compress=True)
INDEX_NAME = "rgbm"
# INDEX_NAME = "krog"


def generator_doc(df):
    for index, document in df.iterrows():
        yield {
            "_index": INDEX_NAME,
            "_type": "_doc",
            "_id": index,
            "_source": document.to_dict()
        }
#    raise StopIteration


def load():
    global QA_TOKENIZER, QA_MODEL, SUMMARY_TOKENIZER, SUMMARY_MODEL
    #QA_TOKENIZER = BertTokenizer.from_pretrained("E:\pubmedbert_fine_tuning\src\model")
    #QA_MODEL = BertForQuestionAnswering.from_pretrained("E:\pubmedbert_fine_tuning\src\model")
    #QA_TOKENIZER = BertTokenizer.from_pretrained('bioasq-biobert/')
    #QA_MODEL = BertForQuestionAnswering.from_pretrained('bioasq-biobert/')
    
    #QA_TOKENIZER = ElectraTokenizer.from_pretrained('/mnt/g/2021_NLP_env/BioM-ELECTRA-Large-SQuAD2-BioASQ8B')
    #QA_MODEL = ElectraForQuestionAnswering.from_pretrained('/mnt/g/2021_NLP_env/BioM-ELECTRA-Large-SQuAD2-BioASQ8B')

    #QA_TOKENIZER = BertTokenizer.from_pretrained('gdario/biobert_bioasq')
    #QA_MODEL = BertForQuestionAnswering.from_pretrained('gdario/biobert_bioasq')

    QA_TOKENIZER = BertTokenizer.from_pretrained('./sapbert-squad2-checkpoint-4088')
    QA_MODEL = BertForQuestionAnswering.from_pretrained('./sapbert-squad2-checkpoint-4088')
    
    #QA_TOKENIZER = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
    #QA_MODEL = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

    #QA_TOKENIZER = BertTokenizer.from_pretrained('ktrapeznikov/biobert_v1.1_pubmed_squad_v2')
    #QA_MODEL = BertForQuestionAnswering.from_pretrained('ktrapeznikov/biobert_v1.1_pubmed_squad_v2')

    #pip install git + https://github.com/vasudevgupta7/transformers.git@ add_big_bird
    QA_MODEL.to(TORCH_DEVICE)
    QA_MODEL.eval()

    bart_model = "bart-large-cnn/"
    # bart_model = "E:/2021 NLP/bart-large-cnn"
    SUMMARY_TOKENIZER = BartTokenizer.from_pretrained(bart_model)
    SUMMARY_MODEL = BartForConditionalGeneration.from_pretrained(bart_model)
    SUMMARY_MODEL.to(TORCH_DEVICE)
    SUMMARY_MODEL.eval()
    
    
#     SUMMARY_TOKENIZER = AutoTokenizer.from_pretrained("google/bigbird-pegasus-large-pubmed", truncation=True)
#     # by default encoder-attention is `block_sparse` with num_random_blocks=3, block_size=64
#     SUMMARY_MODEL = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-pubmed")
#     SUMMARY_MODEL.to(TORCH_DEVICE)
#     SUMMARY_MODEL.eval()

    print("QA and BART model Loaded")



def get_text(INDEX_NAME):

    if INDEX_NAME=="rgbm":
        df = pd.read_excel('fulltext_rGBM.xlsx', engine='openpyxl')
        df = df[["title", "text", "abstract", "url"]]
        df = df.replace(np.nan, '', regex=True)

    elif INDEX_NAME=="krog":
        df = pd.read_excel('fulltext_KROG.xlsx', engine='openpyxl')
        df = df[["title", "text", "abstract", "url"]]
        df = df.replace(np.nan, '', regex=True)

    #df = df.replace(["\r", r"Purpose", r"Background", r"Objective", r"Objectives", "Simple Summary", r"Abstract", r"Keyword",
    #     r"Method", r"Methods", r"Results", r"Conclusions", r"Result", r"Conclusion"], value="", regex=True)

    print("No. of ",INDEX_NAME, " paragraphs: ", df.shape[0])
    
    return df

def reconstruct_text(tokens, start=0, stop=-1):
    tokens = tokens[start: stop]
    if SEP_TOKEN in tokens:
        tokens = tokens[tokens.index(SEP_TOKEN) + 1:]
    txt = " ".join(tokens)
    txt = txt.replace(" ##", '')
    txt = txt.replace("##", '')
    txt = txt.strip()
    txt = txt.replace(" .", ".")
    txt = txt.replace("( ", "(")
    txt = txt.replace(" )", ")")
    txt = txt.replace(" - ", "-")
    txt_list = txt.split(" , ")
    token_count = len(txt_list)
    if token_count == 1:
        return txt_list[0]
    new_txt = []
    for i, t in enumerate(txt_list):
        if i < token_count - 1:
            if t[-1].isdigit() and txt_list[i + 1][0].isdigit():
                new_txt += [t, ',']
            else:
                new_txt += [t, ', ']
        else:
            new_txt += [t]
    return "".join(new_txt)

def get_input_ids(piece_max_size, words, question, input_ids_all):
    word_count = len(words)
    overlap = 1.1
    value = len(input_ids_all) * overlap
    # TODO(ANDREW) max를 4로 제한 할 필요가 있는지 확인 필요
    multiple = min(int(np.floor(value / piece_max_size)), 4)
    if multiple == 0:
        return [input_ids_all]

    search_word_count = int(np.ceil(word_count / (multiple + 1)))
    piece_size = int(np.ceil(word_count / multiple))
    so = search_word_count * overlap
    half_of_so = int(so / 2)
    half_of_po = int(piece_size * overlap / 2)
    input_ids = list()
    for i in range(multiple + 1):
        piece = None
        if i == 0:
            piece = " ".join(words[:int(so)])
        elif i == multiple:
            piece = " ".join(words[-int(so):])
        else:
            boundary = piece_size * i
            if multiple == 4:
                piece = " ".join(words[boundary - half_of_so: boundary + half_of_po])
            else:
                piece = " ".join(words[boundary - half_of_so: boundary + half_of_so])
        input_ids.append(QA_TOKENIZER.encode(question, piece))
    return input_ids

def make_bert_squad_prediction(document, question):
    words = document.split()
    input_ids_all = QA_TOKENIZER.encode(question, document)
    tokens_all = QA_TOKENIZER.convert_ids_to_tokens(input_ids_all)

    input_ids = get_input_ids(PIECE_MAX_SIZE, words, question, input_ids_all)

    absTooLong = False
    answers = []
    cons = []
    for id in input_ids:
        tokens = QA_TOKENIZER.convert_ids_to_tokens(id)
        sep_index = id.index(QA_TOKENIZER.sep_token_id)
        num_seg_a = sep_index + 1
        n_ids = len(id)
        num_seg_b = n_ids - num_seg_a
        segment_ids = [0] * num_seg_a + [1] * num_seg_b

        if n_ids >= PIECE_MAX_SIZE:
            # this cuts off the text if its more than 512 words so it fits in model space
            # need run multiple inferences for longer text. add to the todo
            print(
                f"****** Document is {len(words)} words long. There are {n_ids} tokens")
            absTooLong = True
            id = id[:PIECE_MAX_SIZE]
            segment_ids = segment_ids[:PIECE_MAX_SIZE]
        output = QA_MODEL(torch.tensor([id]).to(TORCH_DEVICE),
                          token_type_ids=torch.tensor([segment_ids]).to(TORCH_DEVICE))

        start_scores = output.start_logits[:, 1:-1]
        end_scores = output.end_logits[:, 1:-1]
        answer_start = torch.argmax(start_scores)
        answer_end = torch.argmax(end_scores)
        answer = reconstruct_text(tokens, answer_start, answer_end + 2)

        if answer.startswith(". ") or answer.startswith(", "):
            answer = answer[2:]

        c = start_scores[0, answer_start].item() + end_scores[0, answer_end].item()
        answers.append(answer)
        cons.append(c)

    max_confidence = max(cons)
    index = [i for i, j in enumerate(cons) if j == max_confidence][0]

    confidence = cons[index]
    answer = answers[index]

    sep_index = tokens_all.index(SEP_TOKEN)
    full_txt_tokens = tokens_all[sep_index + 1:]

    abs_returned = reconstruct_text(full_txt_tokens)

    ans = dict()
    ans["answer"] = answer
    if answer.startswith(CLS_TOKEN) or answer_end.item() < sep_index or answer.endswith(SEP_TOKEN):
        ans["confidence"] = -1000000
    else:
        ans["confidence"] = confidence
    ans["abstract_bert"] = abs_returned
    ans["abs_too_long"] = absTooLong
    return ans

def search_abstracts(df, question):
    abstract_results = {}
    k = 0
    
## Abstract or text?
    for abstract in df["text"]:
        if not abstract:
            continue
        k += 1
        ans = make_bert_squad_prediction(abstract, question)

        if not ans["answer"]:
            continue
        confidence = ans["confidence"]
        # TODO(ANDREW) 동일한 confidence 를 가지는 대답이 이미 있다면 어떻게 처리 해야 하는지?
        abstract_results[confidence] = {}
        abstract_results[confidence]["answer"] = ans["answer"]
        abstract_results[confidence]["abstract_bert"] = ans["abstract_bert"]
        abstract_results[confidence]["idx"] = k
        abstract_results[confidence]["abs_too_long"] = ans["abs_too_long"]

        confidences = list(abstract_results.keys())

    if not confidences:
        return abstract_results

    max_score = max(confidences)
    exp_scores = []
    for c in confidences:
        s = np.exp(c - max_score)
        exp_scores.append(s)
    total = sum(exp_scores)
    abstract_normalization_results = dict()
    for i, c in enumerate(confidences):
        abstract_normalization_results[exp_scores[i] / total] = abstract_results[c]
    return abstract_normalization_results
    #return abstract_normalization_results[confidences]

def display_results(output_path, selected_df, answers, question):
    question_HTML = f'<div style="font-family: Times New Roman; font-size: 28px; padding-bottom:28px"><b>Query</b>: {question}</div>'

    #confidence=BM25 score
    confidence = list(selected_df['score'])

    df = pd.DataFrame(columns=["Answer with Highlights", "BM25 Score", "Title/Link"])
    ranked_aswers = []

    for index in answers:
        answer = answers[index]
        idx = answer["idx"] - 1
        doi = f'<a href="http://{selected_df.loc[idx, "url"]}" target="_blank">{selected_df.loc[idx, "title"]}</a>'
        full_abs = answer["abstract_bert"]
        bert_ans = answer["answer"]

        split_abs = full_abs.split(bert_ans)
        sentance_beginning = split_abs[0][split_abs[0].rfind('.') + 1:]
        if len(split_abs) == 1:
            sentance_end = ""
        else:
            sentance_end_pos = split_abs[1].find('. ') + 1
            if sentance_end_pos == 0:
                sentance_end = split_abs[1]
            else:
                sentance_end = split_abs[1][:sentance_end_pos]

        sentance_html = f"<div>{sentance_beginning} <font color='red'>{bert_ans}</font> {sentance_end}</div>"
        df = df.append({"Answer with Highlights": sentance_html, "BM25 Score": {selected_df.loc[idx, "score"]},
                        "Title/Link": doi}, ignore_index=True)

        ranked_aswers.append(" ".join([sentance_beginning + bert_ans + sentance_end]))

     ## try generating an exacutive summary with extractive summarizer
    number_to_sum = 10
    all_answers = " ".join(ranked_aswers[:number_to_sum]).replace("\n", "")

    answers_input_ids = SUMMARY_TOKENIZER.batch_encode_plus([all_answers], return_tensors='pt',
                                                            max_length=PIECE_MAX_SIZE)["input_ids"].to(TORCH_DEVICE)
    summary_ids = SUMMARY_MODEL.generate(answers_input_ids,
                                         num_beams=2,
                                         length_penalty=1.5,
                                         max_length=PIECE_MAX_SIZE,early_stopping=True,
                                         min_length=128)
    exec_sum = SUMMARY_TOKENIZER.decode(summary_ids.squeeze(), skip_special_tokens=True)
    execSum_HTML = f'<div style="font-family: Arial; font-size: 15px; margin-bottom:1pt"><b>BART Abstractive Summary:</b>: {exec_sum}</div>'
    warning_HTML = '<div style="font-family: Arial; font-size: 12px; padding-bottom:12px; color:#CCCC00; margin-top:1pt"> This is an autogenerated summary. Please review the references.</div>'

                                         
    html_text = f"{question_HTML}\n{execSum_HTML}\n{warning_HTML}\n{df.to_html(render_links=True, escape=False)}"
    with open(os.path.join(output_path, re.sub('[\/:*?"<>|]','',question)+".html"), "w", -1, "utf-8",) as html_file:
        html_file.write(html_text)

    # to make powerbi result                                                                   
    powerbi_text = pd.DataFrame(data={'q': [question_HTML], 'a': [execSum_HTML], 'ref': [df.to_html(render_links=True, escape=False)]})
    with ExcelWriter(os.path.join(output_path, re.sub('[\/:*?"<>|]','',question)+".xlsx")) as writer: 
        powerbi_text.to_excel(writer) 
                                         
    display(HTML(html_text))

def make_db(INDEX_NAME):
    df = get_text(INDEX_NAME)
    helpers.bulk(ES_CLIENT, generator_doc(df))

def service(output_path, query):

    os.makedirs(output_path, exist_ok=True)
    response = ES_CLIENT.search(index=INDEX_NAME, body={"size": 10, "query": {"match": {"text": query}}})['hits']['hits']
    score = [result['_score'] for result in response]
    response = [result['_source'] for result in response]

    df = pd.DataFrame(response)
    #BM25 score to score
    df['score'] = score

    answers = search_abstracts(df, query)
    display_results(output_path, df, answers, query)

In [5]:
## For rGBM DB
make_db(INDEX_NAME)

No. of  rgbm  paragraphs:  9950




In [9]:
load()

  f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions."


QA and BART model Loaded


In [10]:
output_path="./QA_rGBM"

query = "what is a reirradiation dose/fractions for recurrent glioblastoma?"
service(output_path,query)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Unnamed: 0,Answer with Highlights,BM25 Score,Title/Link
0,"the most common approach involves the use of fractionated stereotactic radiotherapy with or without intensity modulation and a median total dose of 30-36 gy. in contrast, stereotactic radiosurgery (the administration of one single fraction), which has the theoretical advantage of sparing normal tissue, is rarely used in glioblastoma because of the poorly defined target volume. interestingly, none of the reirradiation schedules has ever been looked at in a prospective or controlled fashion. in fact, the recent apg101 trial provided no sign of efficacy for reirradiation at × 2 gy in recurrent glioblastoma patients commonly deemed best candidates for that intervention.",{18.899527},3064_Standards of care for treatment of recurrent glioblastoma-are we there yet?
1,"moreover, in the retrospective studies, hypofractionated stereotactic re-irradiation therapy alone also demonstrated comparable survival benefit (a median survival time of about 9-11 months from recurrence).",{16.261627},4785_Recent Advances in Oncolytic Virotherapy and Immunotherapy for Glioblastoma: A Glimmer of Hope in the Search for an Effective Therapy?
2,bevacizumab was also studied in combination with hypofractionated stereotactic radiotherapy in a small pilot study.,{15.838635},3094_Standards of care for treatment of recurrent glioblastoma-are we there yet?
3,"in a pilot trial of 25 patients with recurrent malignant glioma, 20 of whom had glioblastoma, bevacizumab was combined with hypofractionated re-irradiation with 30 gy delivered in 5 fractions.",{15.576691},2446_Current Role of Anti-Angiogenic Strategies for Glioblastoma
4,"specifically, increasing the dose of bpa and administering it over a longer time period or combining bnct with a photon boost, as has been carried out in japan, have resulted in the best survival data obtained to date using bnct to treat patients with gliomas.",{14.455765},4390_Current status of boron neutron capture therapy of high grade gliomas and recurrent head and neck cancer
5,"treated recurrent hgg using srt combined with gefitinib and reported that a dose of 36 gy in three fractions was well tolerated, with gefitinib at a daily dose of 250 mg.",{14.184645},6036_Advances in radiotherapy and comprehensive treatment of high-grade glioma: immunotherapy and tumor-treating fields
6,"although srs as a boost to a standard course of fractionated radiation was not found to be beneficial in patients with newly diagnosed malignant gliomas, its use in the recurrent disease setting produces responses and may lengthen the time to disease progression in selected patients.",{14.007039},9440_Exciting New Advances in Neuro-Oncology The Avenue to a Cure for Malignant Glioma
7,investigated survival outcomes and safety in patients with recurrent hgg using hfsrt .,{13.175507},6031_Advances in radiotherapy and comprehensive treatment of high-grade glioma: immunotherapy and tumor-treating fields
8,"temsirolimus was well tolerated in patients with recurrent glioblastoma multiform (gbm) when given a weekly dose of mg, albeit there was no sign of efficacy in these patients. temsirolimus administered weekly at the dose of mg / m 2 also did not show efficacy in children with high-grade gliomas. despite the lack of clinical efficacy, the high tolerability of temsirolimus has made it desirable for studies in combination with chemotherapeutics, vascular endothelial growth factor (vegf) inhibitors, and other molecular targeted therapies. however, clinical trials have not shown promising combinational therapies of temsirolimus with bevacizumab (vegf inhibitor), sorafenib (raf inhibitor), erlotinib (egfr inhibitor), or radiation therapy. most of these studies have failed on account that temsirolimus doses in combination with other therapies have a lower maximum tolerated dose then what is clinically advantageous.",{13.021731},4419_Recent insights into the pathophysiology of mTOR pathway dysregulation
9,"what is the current role of nitrosoureas, alone or in combination ? does efficacy outweigh their toxicity profile ? we will also address the efficacy of the varied metronomic tmz dosing regimens for rechallenge (ie, patients re-exposed to tmz who had been previously treated, or patients switched to alternative dosing regimens of tmz following signs of relapse or progression on standard tmz therapeutic regimens) as well as for tmz-naive patients.",{12.423975},3060_Standards of care for treatment of recurrent glioblastoma-are we there yet?


In [11]:
query = "what is median overall survival in patients with recurrent glioblastoma?"
service(output_path,query)

Unnamed: 0,Answer with Highlights,BM25 Score,Title/Link
0,"in different randomized trials, patients with recurrent glioblastoma receiving bevacizumab alone had a median overall survival of about 7-10 months from recurrence, and this efficacy was comparable to lomustine monotherapy.",{20.114025},4785_Recent Advances in Oncolytic Virotherapy and Immunotherapy for Glioblastoma: A Glimmer of Hope in the Search for an Effective Therapy?
1,"for patients with recurrent glioblastoma, chemotherapy regimens are associated with overall response rates of 4-9 %, 6-month progression-free survival (pfs) of 10-19 %, and median overall survival durations of 5-10 months .",{19.42046},4645_Antibody-drug conjugates in glioblastoma therapy: the right drugs to the right cells
2,"5 and 7. 5 months (6), respectively.",{18.46055},4250_Efficacy and safety of bevacizumab for the treatment of glioblastoma (Review)
3,"preliminary results from the phase i study were highly exciting, demonstrating a median a median overall survival of 38. 4 months and 5-year overall survival rate was 50 %.",{18.196026},275_Investigational new drugs for brain cancer
4,fda-approved treatment options remain few and the prognosis remains dismal with a median survival of 14. 6 months and a 5-year-survival rate of 9.,{17.97022},"1801_A phase 2 study of the first imipridone ONC201, a selective DRD2 antagonist for oncology, administered every three weeks in recurrent glioblastoma"
5,"different oncolytic viruses have been tested in progressive / recurrent glioblastoma / glioma and proved feasibility and safety, but not efficacy, in terms of median overall survival in randomized trials until now.",{17.304094},4789_Recent Advances in Oncolytic Virotherapy and Immunotherapy for Glioblastoma: A Glimmer of Hope in the Search for an Effective Therapy?
6,8 months and median overall survival of 9 months .,{17.251062},"3392_Bevacizumab for glioblastoma: current indications, surgical implications, and future directions"
7,the median survival time of boron neutron capture therapy (bnct) group (blue line) is 15. 6 months .,{16.879725},4385_Current status of boron neutron capture therapy of high grade gliomas and recurrent head and neck cancer
8,"even with these treatments, median overall survival after recurrence is 6. 2 months .",{16.8114},9130_Current Challenges and Opportunities in Treating Glioblastoma


In [12]:
query = "Are immune checkpoint inhibtors are available for patients with recurrent glioblastoma?"
service(output_path,query)

Unnamed: 0,Answer with Highlights,BM25 Score,Title/Link
0,"several ongoing clinical trials are evaluating immune checkpoint inhibitors in glioblastoma, including trials in recurrent glioblastoma.",{24.822868},4042_Prospective Feasibility Trial for Genomics-Informed Treatment in Recurrent and Progressive Glioblastoma
1,there are many ongoing clinical trials with immune checkpoint inhibitors in patients with primary and recurrent glioma / glioblastoma.,{22.17886},4786_Recent Advances in Oncolytic Virotherapy and Immunotherapy for Glioblastoma: A Glimmer of Hope in the Search for an Effective Therapy?
2,"challenges in the design and conduct of clinical trials for immunotherapies are numerous, particularly in trials involving patients with glioblastoma : different measures of response are required for checkpoint inhibitors, and the management of immune-related adverse events in the cns are a concern. no standardized and validated assays to measure immune response exist , and the current standard of care for glioblastoma-radiotherapy, chemotherapy and supportive steroid use-can have immunosuppressive effects that could counteract the stimulatory effects of checkpoint inhibitors and thereby confound findings.",{20.275654},1951_Prospects of immune checkpoint modulators in the treatment of glioblastoma
3,"although initial clinical results in patients with glioblastoma (gbm) were disappointing, recently published results have demonstrated a potential survival benefit in patients with recurrent gbm treated with neoadjuvant programmed cell death protein 1 blockade.",{20.208551},8765_T lymphocyte-targeted immune checkpoint modulation in glioma
4,"as immunotherapy becomes more widely available, the potential increases for both synergies and adverse interactions between conventional glioblastoma therapies and immune checkpoint inhibitors. thus, questions yet to be resolved include how to combine checkpoint inhibitors with current standards of care for glioblastoma-radiotherapy, temozolomide, bevacizumab and corticosteroids-and whether the use of these agents is associated with positive or negative interactions.",{20.033327},1945_Prospects of immune checkpoint modulators in the treatment of glioblastoma
5,"despite their successes in other solid malignancies, immune-checkpoint inhibitors such as those targeting the pd-1 / pd-l1 axis have largely failed in large-scale clinical trials for glioblastoma.",{19.97369},1461_Molecular Mechanisms of Treatment Resistance in Glioblastoma
6,"given that checkpoint inhibitors target immune responses and are critical in maintenance of the tumor ecosystem, checkpoint inhibitors and immunostimulatory cytokines are appealing as treatments for gbm in conjunction with chemotherapy, radiation therapy and a myriad of receptor-targeted therapies like cytotoxins and viral gene therapy.",{19.74332},366_Receptor-Targeted Glial Brain Tumor Therapies
7,"unfortunately, there are currently limited data on immune checkpoint inhibitors in other types of glioma such as oligodendroglioma or astrocytoma.",{19.475023},8789_T lymphocyte-targeted immune checkpoint modulation in glioma


In [13]:
query = "What molecular targets are potentially promising for recurrent glioblastoma?"
service(output_path,query)

Unnamed: 0,Answer with Highlights,BM25 Score,Title/Link
0,"similarly, targeting c-met overexpression (frequency of 13 % in glioblastoma) and amplification (5 %) with two phase ii trials in recurrent glioblastoma, failed to demonstrate improved outcomes.",{16.614988},3032_The Landscape of Novel Therapeutics and Challenges in Glioblastoma Multiforme: Contemporary State and Future Directions
1,molecular therapies that targeted rtks are promising therapeutic strategies for glioblastoma tumors.,{15.944869},665_In Vitro and In Vivo Analysis of RTK Inhibitor Efficacy and Identification of Its Novel Targets in Glioblastomas
2,both egfr and egfrviii strongly promote gliomagenesis and are promising potential targets for therapy.,{14.733889},7082_Blockade of Glioma Proliferation Through Allosteric Inhibition of JAK2
3,"thus, the molecular target expression status, as determined at the time of primary resection, may not necessarily present rational treatment clues for the care of recurrent gbm that occurs 6-9 months later.",{14.724231},6977_Longitudinal heterogeneity in glioblastoma: moving targets in recurrent versus primary tumors
4,"a simulation example of how one might perform such a task is illustrated by a recent paper by the group 107 in which they propose how to take a "" canonical "" computational model of the disease biochemistry, adjust it to represent (to the best of one ' s ability) what is known about the particular patient, analyze the model to understand points of fragility in the biochemical network, which correspond to good drug targets , and then simulate millions of drug combinations, doses, and timings to predict the best regimens.",{14.199294},2805_CNS Anticancer Drug Discovery and Development Conference White Paper
5,"however, clinical trials have not shown promising combinational therapies of temsirolimus with bevacizumab (vegf inhibitor), sorafenib (raf inhibitor), erlotinib (egfr inhibitor ), or radiation therapy.",{14.119795},4419_Recent insights into the pathophysiology of mTOR pathway dysregulation
6,"ttfields interferes with α-/ β-tubulin and septin 2,6,7 heterotrimer function in tumor cells during mitosis.",{14.014896},2534_An Overview of Alternating Electric Fields Therapy (NovoTTF Therapy) for the Treatment of Malignant Glioma
7,"as previously mentioned, the lpa 1 antagonist ki16425 (kirin brewery co., takasaki, japan) effectively suppresses the lpa-induced motility of glioblastoma cells.",{13.791601},8822_The autotaxin-lysophosphatidic acid-lysophosphatidic acid receptor cascade: proposal of a novel potential therapeutic target for treating glioblastoma multiforme
8,"this study was designed to (i) provide in vivo pharmacokinetic data for nonenhancing and enhancing tumor tissue, (ii) identify the molecular effects of ribociclib in glioblastoma patients, and (iii) interrogate putative mechanisms of resistance in tumor recurrences .",{13.486381},2459_A Phase 0 Trial of Ribociclib in Recurrent Glioblastoma Patients Incorporating a Tumor Pharmacodynamic- and Pharmacokinetic-Guided Expansion Cohort
9,"among the promising therapeutic targets in early clinical development are met, fibroblast growth factor receptor (fgfr), heat shock protein-90 (hsp-90), hypoxia-inducible factor [UNK] ([UNK]), cyclin-dependent kinases, and many others .",{13.480846},9399_Exciting New Advances in Neuro-Oncology The Avenue to a Cure for Malignant Glioma


In [77]:
query = "Is MGMT status associated with the incidence of recurrent glioblastoma?"
service(output_path,query)

Unnamed: 0,Answer with Highlights,BM25 Score,Title/Link
0,two important issues are evident regarding mgmt status and recurrent glioblastoma : (i) whether changes in status occur between primary and recurrent glioblastoma and (ii) whether positive status correlates with better outcome following recurrent disease .,{20.188717},3096_Standards of care for treatment of recurrent glioblastoma-are we there yet?
1,the predictive value of mgmt in recurrent glioblastoma is not yet established.,{20.098183},3855_Using the molecular classification of glioblastoma to inform personalized treatment
2,"in contrast, several other studies describe the absence of significant pfs and os differences with regard to the methylation status of the mgmt promoter in patients with recurrent disease.",{19.802711},3100_Standards of care for treatment of recurrent glioblastoma-are we there yet?
3,"how is recurrence best determined ? which patients qualify for second surgery or repeat radiotherapy ? which patients should not be retreated at all ? how should efficacy of treatment for recurrent glioblastoma be assessed in clinical trials ? is the 6-month pfs rate (pfs6) the optimal end point ? also, the prognostic value of the mgmt status in patients with recurrent glioblastoma is not well defined .",{18.667671},3061_Standards of care for treatment of recurrent glioblastoma-are we there yet?
4,"forthcoming is further insight regarding which patients should undergo a second resection or radiotherapy procedure, how to best use tmz and bevacizumab therapy, and the value of mgmt status assessment in the recurrent setting .",{18.23239},3109_Standards of care for treatment of recurrent glioblastoma-are we there yet?
5,"only few studies in pediatric populations have been reported, showing a similar methylation status in children and adults and significant correlation between the methylation status and clinical outcome .",{16.874937},9274_Molecular Biology in Pediatric High-Grade Glioma: Impact on Prognosis and Treatment
6,"clinical pathology evaluation demonstrated recurrent glioblastoma, methylation of the mgmt promoter, idh1 mutation (p.",{16.680695},4039_Prospective Feasibility Trial for Genomics-Informed Treatment in Recurrent and Progressive Glioblastoma
7,"in a prospective report conducted from 2005 to 2007 that included 22 patients who had recurrent glioblastoma and underwent surgery with carmustine wafer implantation, methylated mgmt status determined by msp was correlated with better outcome. median pfs and os rates in methylated patients were 8. 9 and 14. 2 months, respectively, vs 2. 7 and 9. 2 months in unmethylated patients (p ≤. 031 for both end points). notably, this small study also found that mgmt status did not appear to change between primary and recurrent tumors.",{16.10975},3099_Standards of care for treatment of recurrent glioblastoma-are we there yet?
8,"it is positively associated with glioblastoma sensitivity to alkylating agents, such as temozolomide.",{15.745333},9756_Molecular Heterogeneity of Glioblastoma and its Clinical Relevance


In [14]:
query = "What is the pattern of care in recurrent glioblastoma?"
service(output_path,query)

Unnamed: 0,Answer with Highlights,BM25 Score,Title/Link
0,"although several treatment strategies have been explored, there is no consensus standard of care to improve outcomes for patients with recurrent glioblastoma, and participation in clinical trials is encouraged.",{13.203476},4017_Prospective Feasibility Trial for Genomics-Informed Treatment in Recurrent and Progressive Glioblastoma
1,"how is recurrence best determined ? which patients qualify for second surgery or repeat radiotherapy ? which patients should not be retreated at all ? how should efficacy of treatment for recurrent glioblastoma be assessed in clinical trials ? is the 6-month pfs rate (pfs6) the optimal end point ? also, the prognostic value of the mgmt status in patients with recurrent glioblastoma is not well defined.",{12.910081},3061_Standards of care for treatment of recurrent glioblastoma-are we there yet?
2,"despite definitive data, standard of care guidance for managing patients with recurrent or progressive glioblastoma is evolving .",{12.374693},3109_Standards of care for treatment of recurrent glioblastoma-are we there yet?
3,the primary purpose of this paper is to discuss the role of second-line monotherapy and combination therapies for patients with recurrent or progressive glioblastoma.,{12.368051},3060_Standards of care for treatment of recurrent glioblastoma-are we there yet?
4,"recurrent glioblastoma is mainly treated with resection (if applicable) and chemotherapeutics (tmz, nitrosoureas, bevacizumab), achieving a 12-month overall survival in approximately 14 % of patients.",{11.748512},88_The Diagnostic and Therapeutic Role of Leptin and Its Receptor ObR in Glioblastoma Multiforme
5,"in summary, radiotherapy remains an important part of the standard-of-care treatment for patients with malignant gliomas.",{11.676434},9441_Exciting New Advances in Neuro-Oncology The Avenue to a Cure for Malignant Glioma
6,"vegf receptor (vegfr) inhibition by the tyrosine kinase inhibitor cediranib did also fail to meet the expectations and to prolong pfs and os compared to standard lomustine chemotherapy in recurrent glioblastoma, although some clinical benefits like reduction of steroid use could be detected in this phase iii study, not unlike what has been seen with bevacizumab.",{11.529398},9116_Anti-Angiogenics: Their Role in the Treatment of Glioblastoma
7,"the diffusely infiltrative pattern of progression, however, might be associated with a slower cause of the disease, as it has been suggested by radiological patterns of recurrence of glioblastomas treated with bevacizumab .",{11.215737},9117_Anti-Angiogenics: Their Role in the Treatment of Glioblastoma
8,", glioblastoma c6 cells lines) and some clinical studies have led to randomized-controlled trials incorporating inhibitors of the leptin / obr axis as adjunctive therapy with established protocols in newly diagnosed and recurrent glioblastoma.",{10.977602},100_The Diagnostic and Therapeutic Role of Leptin and Its Receptor ObR in Glioblastoma Multiforme
9,bevacizumab for glioblastoma what can we learn from patterns of progression ?,{10.924695},6747_Bevacizumab for glioblastoma What can we learn from patterns of progression?


In [15]:
!pip install xlrd 

import glob  
import sys

all_data = pd.DataFrame()  

for f in glob.glob(output_path + '/*.xlsx'): 
    df = pd.read_excel(f, engine='openpyxl')  
    all_data = all_data.append(df, ignore_index=True)

#데이터갯수확인  
print(all_data.shape)

#데이터 잘 들어오는지 확인  
all_data.head()

#파일저장  
all_data.to_excel(output_path+'/combined_results.xlsx', header=True, index=False)

Collecting xlrd
  Downloading xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
[K     |████████████████████████████████| 96 kB 591 kB/s eta 0:00:01
[?25hInstalling collected packages: xlrd
Successfully installed xlrd-2.0.1
You should consider upgrading via the '/home/bigwiz83/miniconda3/envs/nlp/bin/python -m pip install --upgrade pip' command.[0m
(5, 4)


In [16]:
glob.glob(output_path + '/*.xlsx')

['./QA_rGBM/what is a reirradiation dosefractions for recurrent glioblastoma.xlsx',
 './QA_rGBM/what is median overall survival in patients with recurrent glioblastoma.xlsx',
 './QA_rGBM/Are immune checkpoint inhibtors are available for patients with recurrent glioblastoma.xlsx',
 './QA_rGBM/What molecular targets are potentially promising for recurrent glioblastoma.xlsx',
 './QA_rGBM/What is the pattern of care in recurrent glioblastoma.xlsx',
 './QA_rGBM/combined_results.xlsx']