In [1]:
import json
import pandas as pd
import requests

from bs4 import BeautifulSoup
from random import randint, random, seed

In [22]:
DATA_PATH = "../data/BioASQ-training8b/training8b.json"
with open(DATA_PATH, "r") as f:
    data = json.load(f)

bioasq_pos = pd.read_pickle("../data/bioasq_pos.pickle")

In [23]:
bioasq_pos.head()

Unnamed: 0,qa_id,type,answer_type,question,answer,label,answer_len
0,55031181e9bde69634000014,summary,ideal_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in RET, GDNF, EDNRB,...",2,407
1,55031181e9bde69634000014_0,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,Hirschsprung disease (HSCR) is a multifactoria...,1,227
2,55031181e9bde69634000014_1,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"In this study, we review the identification of...",1,438
3,55031181e9bde69634000014_2,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in e.g. RET, GDNF, E...",1,542
4,55031181e9bde69634000014_3,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,For almost all of the identified HSCR genes in...,1,338


In [4]:
data["questions"][:5]

[{'body': 'Is Hirschsprung disease a mendelian or a multifactorial disorder?',
  'documents': ['http://www.ncbi.nlm.nih.gov/pubmed/15858239',
   'http://www.ncbi.nlm.nih.gov/pubmed/15829955',
   'http://www.ncbi.nlm.nih.gov/pubmed/20598273',
   'http://www.ncbi.nlm.nih.gov/pubmed/6650562',
   'http://www.ncbi.nlm.nih.gov/pubmed/12239580',
   'http://www.ncbi.nlm.nih.gov/pubmed/21995290',
   'http://www.ncbi.nlm.nih.gov/pubmed/15617541',
   'http://www.ncbi.nlm.nih.gov/pubmed/23001136',
   'http://www.ncbi.nlm.nih.gov/pubmed/8896569'],
  'ideal_answer': ["Coding sequence mutations in RET, GDNF, EDNRB, EDN3, and SOX10 are involved in the development of Hirschsprung disease. The majority of these genes was shown to be related to Mendelian syndromic forms of Hirschsprung's disease, whereas the non-Mendelian inheritance of sporadic non-syndromic Hirschsprung disease proved to be complex; involvement of multiple loci was demonstrated in a multiplicative model."],
  'concepts': ['http://www.d

In [5]:
bsec = set()
esec = set()
for i in range(len(data["questions"])):
    for snip in data["questions"][i]["snippets"]:
#         print("------")
#         print(snip["beginSection"])
#         print(snip["endSection"])
        bsec.add(snip["beginSection"])
        esec.add(snip["endSection"])
#         print("------")
print(bsec)
print(esec)
print("The data from snippets was used to generate the training data.")

{'abstract', 'title', 'sections.0'}
{'abstract', 'title', 'sections.0'}
The data from snippets was used to generate the training data.


In [6]:
url = "https://www.ncbi.nlm.nih.gov/pubmed/21815707"
onset = 178
offset = 475

In [42]:
def get_abstract(url):
    page = requests.get(url)
    print(page)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    div_with_abstr = soup.find(class_='abstr')
    # Need to reach inside (you can do it with a for-loop but as long as the
    # structure is unchanged, then this will be fine.)
    abstract = div_with_abstr.div.p.text
    
    return abstract

def eliminate_relevant_sent(abstract, onset, offset):
    return abstract[:onset] + abstract[offset:]

def split_into_sent(abstract):
    sent = abstract.split(". ")
    sent = [s+"." for s in sent]
    return sent

def create_neg_data_point(url, onset, offset):
    abstract = get_abstract(url)
    abstract = eliminate_relevant_sent(
        abstract, 
        onset, 
        offset
    )
    abstr_sent = split_into_sent(abstract)
    abstr_sent = list(filter(lambda sent: sent != ".", abstr_sent))
    abstr_sent = list(filter(lambda sent: sent != "..", abstr_sent))
    abstr_sent = list(filter(lambda sent: sent != "...", abstr_sent))
    
    ub = len(abstr_sent) - 1
    print(f"ub: {ub}")
    print(abstr_sent)
    return abstr_sent[randint(0, ub)]

def create_neg_dataset(bioasq_path):
    with open(bioasq_path, "r") as f:
        data = json.load(f)
        
    neg_label = 0
    strong_neg_label = -1
    data_size = len(data["questions"])

    neg_data_pts = []
    for i in range(len(data["questions"][:10])):
        id_counter = 1000
        data_id = data["questions"][i]["id"]
        data_type = data["questions"][i]["type"]
        question = data["questions"][i]["body"]
        
        for snip in data["questions"][i]["snippets"]:
            neg_sent = create_neg_data_point(
                snip["document"],
                snip["offsetInBeginSection"],
                snip["offsetInEndSection"]
            )
            data_id = data["questions"][i]["id"]
            neg_data_pts.append((
                data_id, f"{data_id}_{id_counter}", data_type, 
                "snippet_neg", question, neg_sent, neg_label
            ))
            id_counter += 1
    
        min_span = max(0, i - 50)
        max_span = min(data_size - 1, 50 + i)
        print(f"min_span = {min_span}")
        print(f"max_span = {max_span}")
        for _ in range(5):
            j = randint(i + 1, max_span) if (round(random()) or i == 0) else randint(min_span, i - 1)
            print(f"j: {j}")
            
            snip_external = data["questions"][j]["snippets"]
            snip_idx = randint(0, len(snip_external) - 1)
            
            print(f"snip_idx: {snip_idx}")
            
            strong_neg_sent = create_neg_data_point(
                snip_external[snip_idx]["document"],
                snip_external[snip_idx]["offsetInBeginSection"],
                snip_external[snip_idx]["offsetInEndSection"]
            )
            neg_data_pts.append((
                data_id, f"{data_id}_{id_counter}", data_type, 
                "strong_snippet_neg", question, strong_neg_sent, strong_neg_label
            ))
            id_counter += 1
            
    return neg_data_pts

In [43]:
data["questions"][25]["snippets"]

[{'offsetInBeginSection': 0,
  'offsetInEndSection': 246,
  'text': 'Calsequestrin (CSQ) is a Ca(2+) storage protein that interacts with triadin (TRN), the ryanodine receptor (RyR), and junctin (JUN) to form a macromolecular tetrameric Ca(2+) signaling complex in the cardiac junctional sarcoplasmic reticulum (SR).',
  'beginSection': 'abstract',
  'document': 'http://www.ncbi.nlm.nih.gov/pubmed/22427521',
  'endSection': 'abstract'},
 {'offsetInBeginSection': 402,
  'offsetInEndSection': 645,
  'text': 'The decrease of CASQ2 is associated with a reduction in the levels of Triadin (TrD) and Junctin (JnC), two proteins that form, with CASQ2 and RyR2, a macromolecular complex devoted to control of calcium release from the sarcoplasmic reticulum.',
  'beginSection': 'abstract',
  'document': 'http://www.ncbi.nlm.nih.gov/pubmed/22298808',
  'endSection': 'abstract'},
 {'offsetInBeginSection': 0,
  'offsetInEndSection': 246,
  'text': 'Triadin and junctin are integral sarcoplasmic reticulum 

In [44]:
neg_data_pts = create_neg_dataset(DATA_PATH)
neg_data_pts

<Response [200]>
ub: 4
['The identification of common variants that contribute to the genesis of human inherited disorders remains a significant challenge.', 'We have used family-based association studies to identify a disease interval, and integrated this with comparative and functional genomic analysis to prioritize conserved and functional elements within which mutations can be sought.', 'We now show that a common non-coding RET variant within a conserved enhancer-like sequence in intron 1 is significantly associated with HSCR susceptibility and makes a 20-fold greater contribution to risk than rare alleles do.', 'This mutation reduces in vitro enhancer activity markedly, has low penetrance, has different genetic effects in males and females, and explains several features of the complex inheritance pattern of HSCR.', 'Thus, common low-penetrance variants, identified by association studies, can underlie both common and rare diseases..']
<Response [200]>
ub: 4
["Hirschsprung's disease

<Response [200]>
ub: 4
['Hirschsprung disease is a congenital disorder with the incidence of 1 per 5000 live births, characterized by the absence of intestinal ganglion cells.', ' Diverse models of inheritance, co-existence of numerous genetic disorders and detection of numerous chromosomal aberrations together with involvement of various genes confirm the genetic heterogeneity of Hirschsprung disease.', 'Hirschsprung disease might well serve as a model for many complex disorders in which the search for responsible genes has only just been initiated.', 'It seems that the most important role in its genetic etiology plays the RET gene, which is involved in the etiology of at least four diseases.', 'This review focuses on recent advances of the importance of RET gene in the etiology of Hirschsprung disease..']
<Response [200]>
ub: 10
['in children.', 'It is characterized as a sex-linked heterogonous disorder with variable severity and incomplete penetrance giving rise to a variable patter

<Response [200]>
ub: 6
[' It occurs between the 6th and 45th year of life.', 'An early and reliable diagnosis, if possible in the preclinical stage, is the prerequisite for starting therapy in time.', 'By the treatment the quality of life and the expectation of life are raised considerably.', "If consistent treatment is given, there will be no objections to pregnancy in Wilson's disease.", 'It should be interrupted only in case of marked portal hypertension and in the presence of oesophageal varices.', "The examination of the breast milk of a patient suffering from Wilson's disease showed a reduction in the trace elements copper and zinc.", 'It may be necessary to think of copper and zinc substitution in children fed with breast milk only..']
j: 2
snip_idx: 7
<Response [200]>
ub: 15
[' Affinity purified antibodies against this protein localized it primarily to the basement membranes of embryos.', 'The antibodies cross-reacted with another material which was not sulfated and appeared to

<Response [200]>
ub: 0
['The aim of this study was to investigate the biological and clinical significance of epidermal growth factor receptor (EGFR) signaling pathway in follicular dendritic cell sarcoma (FDC-S)..']
<Response [200]>
ub: 6
["This study was conducted to describe the modulation of plasma epidermal growth factor receptor (EGFR) ligands in EGFR-positive metastatic colorectal cancer (mCRC) patients during treatment with cetuximab and irinotecan and to explore the clinical implication of plasma levels' variations as potential biomarkers of benefit, treated with cetuximab and irinotecan.", 'Plasma levels were measured before and 1\xa0h after the first administration of cetuximab, before and 1\xa0h after the second administration, and before the third and the fifth cycles.', 'KRAS and BRAF mutational status were determined.', "EGFR ligands' levels were differently modulated according to tumor KRAS and BRAF mutational status.", 'In KRAS wild-type patients (n\u2009=\u200934), AR

<Response [200]>
ub: 3
['Although KRAS mutation has been identified as a negative predictive biomarker of anti-EGFR antibodies in metastatic colorectal cancer (mCRC), the efficacy in mCRC patients with KRAS wild-type status remains limited.', 'Anti-EGFR antibodies work by blocking ligand binding, but the significance of EGFR ligands in mCRC has not been completely described.', 'This study was conducted to identify the correlation between all seven EGFR ligands and clinical outcomes in mCRC treated with anti-EGFR antibodies.', 'Furthermore, we determined an appropriate predictive strategy for anti-EGFR antibodies using these EGFR ligands..']
<Response [200]>
ub: 9
['Here, we elucidate functional differences among EGFR ligands and mechanisms underlying these distinctions.', 'In 32D/EGFR myeloid and MCF10A breast cells, soluble amphiregulin (AR), transforming growth factor alpha (TGFα), neuregulin 2 beta, and epigen stimulate greater EGFR coupling to cell proliferation and DNA synthesis t

<Response [200]>
ub: 7
['We describe an infant with Hirschsprung disease (congenital aganglionosis of the intestine) involving the colon and terminal ileum.', 'Midtrimester prenatal diagnosis of this disorder in this infant was attempted utilizing amniotic fluid disaccharidase analyses, ultrasound, and amniography.', 'Decreased disaccharidase activities in amniotic fluid have been reported previously in association with other forms of intestinal obstruction.', "At 15 weeks' gestation, normal amniotic fluid disaccharidase levels were obtained.", 'Serial ultrasound evaluations did not indicate any pathology, and the results from amniography were inconclusive.', "The implication of the normal disaccharidase values is that Hirschsprung disease may in some cases result from degeneration of intestinal ganglia after 16 weeks' gestation rather than from faulty migration of neural crest cells.", 'The case we report has a family history of three affected first- and second-degree relatives.', 'Au

<Response [200]>
ub: 6
["Hirschsprung's disease is characterized by the absence of ganglion cells in the myenteric and submucosal plexuses of the gastrointestinal tract.", "Genetic dissection was successful as nine genes and four loci for Hirschsprung's disease susceptibility were identified.", 'Different approaches were used to find these loci such as classical linkage in large families, identity by descent mapping in an inbred kindred, candidate gene approaches based on naturally occurring mutant mice models, and finally the use of model-free linkage and association analyzes.', "In this study, we review the identification of genes and loci involved in the non-syndromic common form and syndromic Mendelian forms of Hirschsprung's disease.", "The non-Mendelian inheritance of sporadic non-syndromic Hirschsprung's disease proved to be complex; involvement of multiple loci was demonstrated in a multiplicative model.", "We discuss the practical implications of the elucidation of genes assoc

<Response [200]>
ub: 5
['Thousands of long noncoding RNAs (lncRNAs) have been found in vertebrate animals, a few of which have known biological roles.', 'To better understand the genomics and features of lncRNAs in invertebrates, we used available RNA-seq, poly(A)-site, and ri.', 'Compared to protein-coding genes, the lncRNA genes tended to be expressed in a stage-dependent manner.', 'Approximately 25% of the newly identified lincRNAs showed little signal for sequence conservation and mapped antisense to clusters of endogenous siRNAs, as would be expected if they serve as templates and targets for these siRNAs.', 'The other 75% tended to be more conserved and included lincRNAs with intriguing expression and sequence features associating them with processes such as dauer formation, male identity, sperm formation, and interaction with sperm-specific mRNAs.', 'Our study provides a glimpse into the lncRNA content of a nonvertebrate animal and a resource for future studies of lncRNA functio

<Response [200]>
ub: 1
['Although elevated resting brain natriuretic peptide (BNP) concentrations reflect heart disease, the meaning of exercise-induced increases is poorly understood and has been examined in small groups only.', 'Therefore, the present study aimed to examine the increase in N-terminal pro-brain natriuretic peptide (NT-proBNP) and relations to cardiac troponin I and T (cTnI, cTnT) elevations after prolonged strenuous exercise in a large cohort of athletes..']
<Response [200]>
ub: 6
['Resistance to apoptosis is a major problem in ovarian cancer (OC) and correlates with poor prognosis.', 'Osteoprotegecrosis factor-related apoptosis-inducing ligand (TRAIL).', 'OPG has been reported to attenuate TRAIL-induced apoptosis in a variety of cancer cells, including OC cells.', 'OPG-mediated protection against TRAIL has been attributed to its decoy receptor function.', 'However, OPG activates integrin/focal adhesion kinase (FAK) signaling in endothelial cells.', 'In OC cells, acti

<Response [200]>
ub: 6
['Pulsed electromagnetic field (PEMF) has been shown to increase bone mineral density in osteoporosis patients and prevent bone loss in ovariectomized rats.', 'But the mechanisms through which PEMF elicits these favorable biological responses are still not fully understood.', 'The purpose of this study was to investigate the effects of PEMF on RANKL and OPG expression in ovariectomized rats.', 'Thirty 3-month-old female Sprague-Dawley rats were randomly divided into three groups: sham-operated control (Sham), ovariectomy control (OVX), and ovariectomy with PEMF treatment (PEMF).', 'After 12-week interventions, the results showed that PEMF increased serum 17β-estradiol level, reduced serum tartrate-resistant acid phosphatase level, increased bone mineral density, and inhibited deterioration of bone microarchitecture and strength in OVX rats.', 'Furthermore, PEMF could suppress RANKL expression and improve OPG expression in bone marrow cells of OVX rats.', 'In conc

<Response [200]>
ub: 6
['After fertilization, maternal factors direct development and trigger zygotic genome activation (ZGA) at the maternal-to-zygotic transition (MZT).', 'In zebrafish, ZGA is required for gastrulation and clearance of maternal messenger RNAs, which is in part regulated by the conserved microRNA miR-430.', 'However, the factors that activate the zygotic program in vertebrates are unknown.', ' We identified several hundred genes directly activated by maternal factors, constituting the first wave of zygotic transcription.', 'Ribosome profiling revealed that nanog, sox19b and pou5f1 are the most highly translated transcription factors pre-MZT.', 'Combined loss of these factors resulted in developmental arrest before gastrulation and a failure to activate >75% of zygotic genes, including miR-430.', 'Our results demonstrate that maternal Nanog, Pou5f1 and SoxB1 are required to initiate the zygotic developmental program and induce clearance of the maternal program by activ

<Response [200]>
ub: 8
['Despite advances in detection and therapy, epithelial ovarian cancer (EOC) still represents the most lethal gynecologic malignancy in women worldwide.', 'The high mortality of EOC is mainly due to late-stage diagnosis for more than 70% of patients.', 'There is an urgent need to search for specific and sensitive biomarkers for early diagnosis of EOC.', 'Recently, the cumulative data indicated an essential role for microRNA (miRNA), a class of small non-coding RNAs targeting multiple mRNAs and triggering translation repression and/or RNA degradation, in ovarian caner carcinogenesis and progression.', 'Here, we reviewed the published miRNA expression profiling studies that compared the miRNA expression profiles between EOC tissues or cell lines and normal ovarian tissues or benign ovarian tumor or human primary cultured ovarian surface epithelial cells.', 'A miRNA ranking system that takes the number of comparisons in agreement and direction of differential expres

<Response [200]>
ub: 8
['One of the best prognostic predictors for patients with epithelial ovarian cancer is the Federation of Obstetrics and Gynecology (FIGO) stage at diagnosis.', 'Advanced-stage ovarian serous carcinoma (OSC) generally have poor prognosis.', 'The goal of this study is to develop and validate a miRNA expression profile that can differentiate the OSC at early and advanced stages and study its correlation with the prognosis of OSC.', 'To identify a unique microRNA (miRNA) pattern associated with the progression of OSC at early and advanced stages, a miRNA microarray was performed using Chinese tumor bank specimens of patients with OSC stage I or III in a retrospective analysis.', 'The expression of four dysregulated miRNAs was validated using quantitative real-time polymerase chain reaction (qRT-PCR) in an external cohort of 51 cases of OSC samples at stages I and III.', 'Kaplan-Meier analysis was performed to analyze the correlation between the expression of some miR

<Response [200]>
ub: 2
['rol important cellular activities.', 'Let-7 is shown in vitro to sensitize cancer cells to platinum, but induce ovarian cancer resistance to paclitaxel.', 'This study aims to investigate the effect of let-7a expression on survival outcomes of epithelial ovarian cancer (EOC) patients treated with different chemotherapy..']
<Response [200]>
ub: 2
['Let-7 is a family of small non-coding RNAs regulating the expression of many genes that control important cellular activities.', 'Let-7 is shown in vitro to sensitize cancer cells to platinum, but induce ovarian cancer resistance to paclitaxel.', 'This study aims to investigate the effect of let-7a expression on survival outcomes of epithelial ovarian cancer (EOC) patients treated with different chemotherapy..']
<Response [200]>
ub: 4
['International Federation of Gynecology and Obstetrics stage I epithelial ovarian cancer (EOC) has a significantly better prognosis than stage III/IV EOC, with about 80% of patients surv

<Response [200]>
ub: 12
[' Transglutaminases-2 (Tgase-2) is a group of multifunctional enzymes that plays a role in cancer cell metastasis and bone formation.', 'However, relationship between OPG and Tgase-2 is not studied.', 'Therefore, we investigated the involvement of 12-O-Tetradecanoylphorbol 13-acetate in the expression of OPG in MG-63 osteosarcoma cells.', 'Interleukin-1β time-dependently induced OPG and Tgase-2 expression in cell lysates and media of the MG-63 cells by a Western blot.', 'Additional 110 kda band was found in the media of MG-63 cells.', '12-O-Tetradecanoylphorbol 13-acetate also induced OPG and Tgase-2 expression.', 'However, an 110 kda band was not found in TPA-treated media of MG-63 cells.', 'Cystamine, a Tgase-2 inhibitor, dose-dependently suppressed the expression of OPG in MG-63 cells.', 'Gene silencing of Tgase-2 also signifi cantly suppressed the expression of OPG in MG-63 cells.', 'Next, we examined whether a band of 110 kda of OPG contains an isopeptide 

<Response [200]>
ub: 1
["In myasthenia gravis, antibody-mediated blockade of acetylcholine receptors at the neuromuscular junction abolishes the naturally occurring 'safety factor' of synaptic transmission.", 'Acetylcholinesterase inhibitors provide temporary symptomatic treatment of muscle weakness, but there is controversy about their long-term efficacy, dosage and side effects..']
<Response [200]>
ub: 7
['Acquired myasthenia gravis (MG) is a chronic autoimmune disorder of the neuromuscular junction, characterized clinically by muscle weakness and abnormal fatigability on exertion.', 'This review focuses on treatment of MG, mainly on the use of the AChE inhibitor pyridostigmine.', 'Despite a lack of data from well controlled clinical trials to support their use, AChE inhibitors, of which pyridostigmine is the most commonly used, are recommended as first-line therapy for MG.', 'Pyridostigmine has been used as a treatment for MG for over 50 years and is generally considered safe.', 'It

<Response [200]>
ub: 2
['Levothyroxine (LT4) absorption is affected by concomitant ingestion of certain minerals, medications, and foods.', 'It has been hypothesized that metformin may suppress serum thyrotropin (TSH) concentrations by enhancing LT4 absorption or by directly affecting the hypothalamic-pituitary axis.', 'This study examined the effect of metformin ingestion on LT4 absorption, as assessed by serum total thyroxine (TT4) concentrations..']
<Response [200]>
ub: 1
['Denosumab (Xskeletal fractures in patients with bone metastases from solid tumors.', 'Although there is a widespread use of such drug in patients under risk of pathological fractures, the compatibility of denosumab therapy with percutaneous vertebroplasty (an interventional procedure commonly used for pain control in such population) has not yet been established..']
<Response [200]>
ub: 1
['Denosumab (XGeva) is a receptor activator of nuclear factor-κB ligand (RANKL)-antibody that was approved by the Food and Dru

<Response [200]>
ub: 3
['In 2007, the Agency for Healthcare Research and Quality(AHRQ) published a systematic review on the comparative effectiveness of treatments for osteoporosis.', 'The review included studies on the benefits and risks of medications and therapies used to prevent fractures in postmenopausal women and men with low bone density (osteopenia) or osteoporosis.', 'Factors that may affect adherence to treatment, and monitoring for the identification of those most likely to benefit from treatment were also included in this review.', 'AHRQ publish dietary and supplemental calcium and vitamin D, as well as weight-bearing exercise, for the preservation of bone mass and the decrease of fracture risk in patients with osteoporosis, were evaluated..']
<Response [200]>
ub: 5
['Prostate cancer (PC) is the leading cause of cancer and the second leading cause of cancer-death among men in the Western world.', 'About 10-20% of men with PC present with metastatic disease at diagnosis, wh

<Response [200]>
ub: 5
['Osteoporosis in men is finally receiving some attention; it has been realized that men are more likely to die after hip fracture.', 'Methods for screening men for osteoporosis include dual energy x-ray absorptiometry and use of fracture risk calculators such as FRAX (World Health Organization) and the Garvan nomogram.', 'Evaluation of men will often identify secondary causes of osteoporosis as well as multiple risk factors.', 'Alendronate, risedronate, zoledronic acid, and teriparatide are US Food and Drug Administration (FDA)--approved therapy for men.', 'Men on androgen deprivation therapy (ADT) are at high risk for bone loss and fracture, and all the bisphosphonates have been shown to increase bone density.', 'Thus, there is great progress in osteoporosis in men, and recognition of its importance is increasing..']
<Response [200]>
ub: 4
['Fully human monoclonal antibodies (mAbs) are a promising and rapidly growing category of targeted therapeutic agents.', '

<Response [200]>
ub: 6
[' Similarity searches of protein and DNA data bases revealed that Dvl-1 encodes an otherwise novel polypeptide.', 'While no functional motifs were identified, one region of Dvl-1 was found to be similar to a domain of discs large-1 (dlg), a Drosophila tumor suppressor gene.', 'In the embryo, Dvl-1 is expressed in most tissues, with uniformly high levels in the central nervous system.', 'From 7.5 days postcoitum Dvl-1 is expressed throughout the developing brain and spinal cord, including those regions expressing Wnt-1 and En.', 'Expression of Dvl-1 in adult mice was found to be widespread, with brain and testis exhibiting the highest levels.', 'The majority of Dvl-1 expression in the adult cerebellum is in the granular cell layer, similar to the pattern seen for engrailed-2 (En-2).', 'Throughout postnatal development of the brain Dvl-1 is highly expressed in areas of high neuronal cell density..']
<Response [200]>
ub: 4
[' the latter suppresses beta-catenin by p

[('55031181e9bde69634000014',
  '55031181e9bde69634000014_1000',
  'summary',
  'snippet_neg',
  'Is Hirschsprung disease a mendelian or a multifactorial disorder?',
  'This mutation reduces in vitro enhancer activity markedly, has low penetrance, has different genetic effects in males and females, and explains several features of the complex inheritance pattern of HSCR.',
  0),
 ('55031181e9bde69634000014',
  '55031181e9bde69634000014_1001',
  'summary',
  'snippet_neg',
  'Is Hirschsprung disease a mendelian or a multifactorial disorder?',
  "We discuss the practical implications of the elucidation of genes associated with Hirschsprung's disease susceptibility for genetic counseling.",
  0),
 ('55031181e9bde69634000014',
  '55031181e9bde69634000014_1002',
  'summary',
  'snippet_neg',
  'Is Hirschsprung disease a mendelian or a multifactorial disorder?',
  'For almost all of the identified HSCR genes incomplete penetrance of the HSCR phenotype has been reported, probably due to modif

In [45]:
neg_data_pts

[('55031181e9bde69634000014',
  '55031181e9bde69634000014_1000',
  'summary',
  'snippet_neg',
  'Is Hirschsprung disease a mendelian or a multifactorial disorder?',
  'This mutation reduces in vitro enhancer activity markedly, has low penetrance, has different genetic effects in males and females, and explains several features of the complex inheritance pattern of HSCR.',
  0),
 ('55031181e9bde69634000014',
  '55031181e9bde69634000014_1001',
  'summary',
  'snippet_neg',
  'Is Hirschsprung disease a mendelian or a multifactorial disorder?',
  "We discuss the practical implications of the elucidation of genes associated with Hirschsprung's disease susceptibility for genetic counseling.",
  0),
 ('55031181e9bde69634000014',
  '55031181e9bde69634000014_1002',
  'summary',
  'snippet_neg',
  'Is Hirschsprung disease a mendelian or a multifactorial disorder?',
  'For almost all of the identified HSCR genes incomplete penetrance of the HSCR phenotype has been reported, probably due to modif

In [47]:
col_names = [
    "qa_id", "qa_snap_id", "type", "answer_type",
    "question", "answer", "label"
]
bioasq_neg = pd.DataFrame(neg_data_pts, columns=col_names)
bioasq_neg["answer_len"] = bioasq_neg["answer"].apply(len)
bioasq_neg.head()

Unnamed: 0,qa_id,qa_snap_id,type,answer_type,question,answer,label,answer_len
0,55031181e9bde69634000014,55031181e9bde69634000014_1000,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,This mutation reduces in vitro enhancer activi...,0,204
1,55031181e9bde69634000014,55031181e9bde69634000014_1001,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,We discuss the practical implications of the e...,0,143
2,55031181e9bde69634000014,55031181e9bde69634000014_1002,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,For almost all of the identified HSCR genes in...,0,137
3,55031181e9bde69634000014,55031181e9bde69634000014_1003,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,"Hirschsprung disease (HSCR), or congenital int...",0,223
4,55031181e9bde69634000014,55031181e9bde69634000014_1004,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,The identification of common variants that con...,0,130


In [48]:
bioasq_neg.to_pickle("../data/bioasq_neg_sample.pickle")

In [34]:
bioasq_neg.iloc[:, :2]

Unnamed: 0,qa_id,type
0,55031181e9bde69634000014_1000,summary
1,55031181e9bde69634000014_1001,summary
2,55031181e9bde69634000014_1002,summary
3,55031181e9bde69634000014_1003,summary
4,55031181e9bde69634000014_1004,summary
...,...,...
76,55262a9787ecba3764000009_1011,yesno
77,55262a9787ecba3764000009_1012,yesno
78,55262a9787ecba3764000009_1013,yesno
79,55262a9787ecba3764000009_1014,yesno


In [27]:
pd.concat([bioasq_pos, bioasq_neg], ignore_index=True)

Unnamed: 0,qa_id,type,answer_type,question,answer,label,answer_len
0,55031181e9bde69634000014,summary,ideal_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in RET, GDNF, EDNRB,...",2,407
1,55031181e9bde69634000014_0,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,Hirschsprung disease (HSCR) is a multifactoria...,1,227
2,55031181e9bde69634000014_1,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"In this study, we review the identification of...",1,438
3,55031181e9bde69634000014_2,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in e.g. RET, GDNF, E...",1,542
4,55031181e9bde69634000014_3,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,For almost all of the identified HSCR genes in...,1,338
...,...,...,...,...,...,...,...
45208,55262a9787ecba3764000009_1011,yesno,strong_snippet_neg,Is RANKL secreted from the cells?,"We studied a recombinant, humanized anti-Her-2...",-1,119
45209,55262a9787ecba3764000009_1012,yesno,strong_snippet_neg,Is RANKL secreted from the cells?,"After fertilization, maternal factors direct d...",-1,145
45210,55262a9787ecba3764000009_1013,yesno,strong_snippet_neg,Is RANKL secreted from the cells?,The tumor suppressor NF2 is disrupted in appro...,-1,143
45211,55262a9787ecba3764000009_1014,yesno,strong_snippet_neg,Is RANKL secreted from the cells?,"that we have found to be involved in, (1) thin...",-1,224


In [None]:
break

In [109]:
round(random())

0

In [102]:
abstr = get_abstract(url)
abstr

<Response [200]>


'Acquired myasthenia gravis (MG) is a chronic autoimmune disorder of the neuromuscular junction, characterized clinically by muscle weakness and abnormal fatigability on exertion. Current guidelines and recommendations for MG treatment are based largely on clinical experience, retrospective analyses and expert consensus. Available therapies include oral acetylcholinesterase (AChE) inhibitors for symptomatic treatment, and short- and long-term disease-modifying treatments. This review focuses on treatment of MG, mainly on the use of the AChE inhibitor pyridostigmine. Despite a lack of data from well controlled clinical trials to support their use, AChE inhibitors, of which pyridostigmine is the most commonly used, are recommended as first-line therapy for MG. Pyridostigmine has been used as a treatment for MG for over 50 years and is generally considered safe. It is suitable as a long-term treatment in patients with generalized non-progressive milder disease, and as an adjunctive therap

In [75]:
abstr[178:475]

' Current guidelines and recommendations for MG treatment are based largely on clinical experience, retrospective analyses and expert consensus. Available therapies include oral acetylcholinesterase (AChE) inhibitors for symptomatic treatment, and short- and long-term disease-modifying treatments.'

In [74]:
abstract = eliminate_relevant_sent(abstr, onset, offset)
abstract

'Acquired myasthenia gravis (MG) is a chronic autoimmune disorder of the neuromuscular junction, characterized clinically by muscle weakness and abnormal fatigability on exertion. This review focuses on treatment of MG, mainly on the use of the AChE inhibitor pyridostigmine. Despite a lack of data from well controlled clinical trials to support their use, AChE inhibitors, of which pyridostigmine is the most commonly used, are recommended as first-line therapy for MG. Pyridostigmine has been used as a treatment for MG for over 50 years and is generally considered safe. It is suitable as a long-term treatment in patients with generalized non-progressive milder disease, and as an adjunctive therapy in patients with severe disease who are also receiving immunotherapy. Novel AChE inhibitors with oral antisense oligonucleotides have been developed and preliminary results appear to be promising. In general, however, AChE inhibitors provide only partial benefit and most patients eventually swi

In [80]:
sent_abstr = split_into_sent(abstract)
sent_abstr

['Acquired myasthenia gravis (MG) is a chronic autoimmune disorder of the neuromuscular junction, characterized clinically by muscle weakness and abnormal fatigability on exertion.',
 'This review focuses on treatment of MG, mainly on the use of the AChE inhibitor pyridostigmine.',
 'Despite a lack of data from well controlled clinical trials to support their use, AChE inhibitors, of which pyridostigmine is the most commonly used, are recommended as first-line therapy for MG.',
 'Pyridostigmine has been used as a treatment for MG for over 50 years and is generally considered safe.',
 'It is suitable as a long-term treatment in patients with generalized non-progressive milder disease, and as an adjunctive therapy in patients with severe disease who are also receiving immunotherapy.',
 'Novel AChE inhibitors with oral antisense oligonucleotides have been developed and preliminary results appear to be promising.',
 'In general, however, AChE inhibitors provide only partial benefit and mos

In [59]:
abstr.div.p.text

'The identification of common variants that contribute to the genesis of human inherited disorders remains a significant challenge. Hirschsprung disease (HSCR) is a multifactorial, non-mendelian disorder in which rare high-penetrance coding sequence mutations in the receptor tyrosine kinase RET contribute to risk in combination with mutations at other genes. We have used family-based association studies to identify a disease interval, and integrated this with comparative and functional genomic analysis to prioritize conserved and functional elements within which mutations can be sought. We now show that a common non-coding RET variant within a conserved enhancer-like sequence in intron 1 is significantly associated with HSCR susceptibility and makes a 20-fold greater contribution to risk than rare alleles do. This mutation reduces in vitro enhancer activity markedly, has low penetrance, has different genetic effects in males and females, and explains several features of the complex inh

In [57]:
for an in abstr:
    print('----')
    print(an.find('p'))

----
None
----
<p>The identification of common variants that contribute to the genesis of human inherited disorders remains a significant challenge. Hirschsprung disease (HSCR) is a multifactorial, non-mendelian disorder in which rare high-penetrance coding sequence mutations in the receptor tyrosine kinase RET contribute to risk in combination with mutations at other genes. We have used family-based association studies to identify a disease interval, and integrated this with comparative and functional genomic analysis to prioritize conserved and functional elements within which mutations can be sought. We now show that a common non-coding RET variant within a conserved enhancer-like sequence in intron 1 is significantly associated with HSCR susceptibility and makes a 20-fold greater contribution to risk than rare alleles do. This mutation reduces in vitro enhancer activity markedly, has low penetrance, has different genetic effects in males and females, and explains several features o

In [79]:
from random import randint

In [92]:
len(sent_abstr)

8

In [93]:
randint(0, len(sent_abstr) - 1)

5

In [129]:
data_size = len(data["questions"])
print(data_size)

i = randint(0, data_size)
i = 3223

min_span = max(0, i - 50)
max_span = min(data_size - 1, 50 + i)
print(i)
print(f"min_span = {min_span}")
print(f"max_span = {max_span}")

3243
3223
min_span = 3173
max_span = 3242


In [53]:
data["questions"][34]["snippets"][6]

{'offsetInBeginSection': 0,
 'offsetInEndSection': 87,
 'text': 'Recurrent mutations in SMO and AKT1 are mutually exclusive with NF2 loss in meningioma.',
 'beginSection': 'abstract',
 'document': 'http://www.ncbi.nlm.nih.gov/pubmed/23475883',
 'endSection': 'abstract'}

In [None]:
# j: 34
# snip_idx: 6
# <Response [200]>
# ub: -1
# []
# Traceback (most recent call last):
#   File "local_parser/build_bioasq_df.py", line 174, in <module>
#     bioasq_neg = build_bioasq_neg(DATA_PATH, COL_NAMES)
#   File "local_parser/build_bioasq_df.py", line 155, in build_bioasq_neg
#     neg_data_arr = create_neg_dataset(data_path)
#   File "local_parser/build_bioasq_df.py", line 144, in create_neg_dataset
#     snip_external[snip_idx]["offsetInEndSection"]
#   File "local_parser/build_bioasq_df.py", line 98, in create_neg_data_point
#     return abstr_sent[randint(0, ub)]
#   File "/Users/xu081/Documents/trec_t2/venv/lib/python3.7/random.py", line 222, in randint
#     return self.randrange(a, b+1)
#   File "/Users/xu081/Documents/trec_t2/venv/lib/python3.7/random.py", line 200, in randrange
#     raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
# ValueError: empty range for randrange() (0,0, 0)

In [55]:
x[0:0] + x[21:21]

''

In [8]:
neg_backup_today = pd.read_pickle("../data/bioasq_neg_sample_backup.pickle")
print(neg_backup_today.shape)
neg_backup_today.head()

(11477, 8)


Unnamed: 0,qa_id,qa_snap_id,type,answer_type,question,answer,label,answer_len
0,514a0a57d24251bc05000051,514a0a57d24251bc05000051_1000,factoid,snippet_neg,Which drug should be used as an antidote in be...,Benzodiazeps undertaken to examine the frequen...,0,130
1,514a0a57d24251bc05000051,514a0a57d24251bc05000051_1001,factoid,snippet_neg,Which drug should be used as an antidote in be...,Flumazenil is an effective antidote but there ...,0,138
2,514a0a57d24251bc05000051,514a0a57d24251bc05000051_1002,factoid,snippet_neg,Which drug should be used as an antidote in be...,Benzodiazepine (BZD) overdose (OD) continues t...,0,100
3,514a0a57d24251bc05000051,514a0a57d24251bc05000051_1003,factoid,snippet_neg,Which drug should be used as an antidote in be...,"We present a 75-year-old woman, who was brough...",0,125
4,514a0a57d24251bc05000051,514a0a57d24251bc05000051_1004,factoid,snippet_neg,Which drug should be used as an antidote in be...,The actions of benzodiazepines are due to the ...,0,135


In [7]:
neg_backup = pd.read_pickle("../data/bioasq_neg_sample_backup_25864.pickle")
print(neg_backup.shape)
neg_backup.head()

(25864, 8)


Unnamed: 0,qa_id,qa_snip_id,type,answer_type,question,answer,label,answer_len
0,55031181e9bde69634000014,55031181e9bde69634000014_1000,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,The identification of common variants that con...,0,130
1,55031181e9bde69634000014,55031181e9bde69634000014_1001,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,Hirschsprung's disease is characterized by the...,0,144
2,55031181e9bde69634000014,55031181e9bde69634000014_1002,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,"Therefore, HSCR has become a model for a compl...",0,202
3,55031181e9bde69634000014,55031181e9bde69634000014_1003,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,"RET, GDNF, EDNRB, EDN3, and SOX10 lead to long...",0,179
4,55031181e9bde69634000014,55031181e9bde69634000014_1004,summary,snippet_neg,Is Hirschsprung disease a mendelian or a multi...,The identification of common variants that con...,0,130


In [131]:
neg_backup.qa_id.nunique()

1301

In [130]:
print(neg_backup.tail()["question"][6347])

KeyError: 6347

In [79]:
print(neg_backup.tail()["answer"][6347])

The direct dopamine agonists, including ropinirole, pramipexole, and rotigotine patch, are also effective, although side effects, including daytime sleepiness, impulse control disorders, and augmentation, may limit usefulness.r.


In [81]:
print(neg_backup.qa_id.nunique())

321


In [69]:
pos = pd.read_pickle("../data/bioasq_pos.pickle")
print(pos.shape)
pos.head()

(45132, 8)


Unnamed: 0,qa_id,qa_snip_id,type,answer_type,question,answer,label,answer_len
0,55031181e9bde69634000014,55031181e9bde69634000014_0,summary,ideal_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in RET, GDNF, EDNRB,...",2,407
1,55031181e9bde69634000014,55031181e9bde69634000014_1,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,Hirschsprung disease (HSCR) is a multifactoria...,1,227
2,55031181e9bde69634000014,55031181e9bde69634000014_2,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"In this study, we review the identification of...",1,438
3,55031181e9bde69634000014,55031181e9bde69634000014_3,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in e.g. RET, GDNF, E...",1,542
4,55031181e9bde69634000014,55031181e9bde69634000014_4,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,For almost all of the identified HSCR genes in...,1,338


In [83]:
pos.qa_id.nunique()

3243

In [85]:
45132 + 3243 * 5

61347

In [115]:
train = pd.read_pickle("../data/bioasq_df_train_sample.pickle")
print(train.shape)
train.head()

(9675, 8)


Unnamed: 0,qa_id,type,answer_type,question,answer,label,answer_len,qa_snap_id
0,55031181e9bde69634000014,summary,ideal_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in RET, GDNF, EDNRB,...",1,407,55031181e9bde69634000014_0
1,55031181e9bde69634000014,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,Hirschsprung disease (HSCR) is a multifactoria...,1,227,55031181e9bde69634000014_1
2,55031181e9bde69634000014,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"In this study, we review the identification of...",1,438,55031181e9bde69634000014_2
3,55031181e9bde69634000014,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,"Coding sequence mutations in e.g. RET, GDNF, E...",1,542,55031181e9bde69634000014_3
4,55031181e9bde69634000014,summary,snippet_answer,Is Hirschsprung disease a mendelian or a multi...,For almost all of the identified HSCR genes in...,1,338,55031181e9bde69634000014_4


In [116]:
train_subset = train[train["qa_id"].isin(train.qa_id.unique()[:1])].reset_index(drop=True)
train_subset.shape

(38, 8)

In [117]:
train_subset.to_pickle("../data/bioasq_df_train_tiny_sample.pickle")

In [122]:
test = pd.read_pickle("../data/bioasq_df_test_sample.pickle")
print(test.shape)
test.head()

(2244, 8)


Unnamed: 0,qa_id,type,answer_type,question,answer,label,answer_len,qa_snap_id
0,51406dd123fec90375000008,summary,ideal_answer,What is the treatment of subacute thyroiditis?,Common treatment of subacute thyroiditis is w...,1,103,51406dd123fec90375000008_0
1,51406dd123fec90375000008,summary,snippet_answer,What is the treatment of subacute thyroiditis?,Oral glucocorticoids are administered in moder...,1,96,51406dd123fec90375000008_1
2,51406dd123fec90375000008,summary,snippet_answer,What is the treatment of subacute thyroiditis?,he treatment protocol that we employed had 15 ...,1,143,51406dd123fec90375000008_2
3,56e45bc651531f7e33000018,summary,ideal_answer,What are the effects of BMAL1 deficiency?,BMAL1 deficiency is associated with premature ...,1,236,56e45bc651531f7e33000018_0
4,56e45bc651531f7e33000018,summary,snippet_answer,What are the effects of BMAL1 deficiency?,BMAL1 deficiency is associated with premature ...,1,72,56e45bc651531f7e33000018_1


In [119]:
test_subset = test[test["qa_id"].isin(test.qa_id.unique()[:1])].reset_index(drop=True)
test_subset.shape

(10, 8)

In [120]:
test_subset

Unnamed: 0,qa_id,type,answer_type,question,answer,label,answer_len,qa_snap_id
0,51406dd123fec90375000008,summary,ideal_answer,What is the treatment of subacute thyroiditis?,Common treatment of subacute thyroiditis is w...,1,103,51406dd123fec90375000008_0
1,51406dd123fec90375000008,summary,snippet_answer,What is the treatment of subacute thyroiditis?,Oral glucocorticoids are administered in moder...,1,96,51406dd123fec90375000008_1
2,51406dd123fec90375000008,summary,snippet_answer,What is the treatment of subacute thyroiditis?,he treatment protocol that we employed had 15 ...,1,143,51406dd123fec90375000008_2
3,51406dd123fec90375000008,summary,snippet_neg,What is the treatment of subacute thyroiditis?,"However, there have been no reports regarding ...",0,106,51406dd123fec90375000008_1000
4,51406dd123fec90375000008,summary,snippet_neg,What is the treatment of subacute thyroiditis?,"In this study, we used 15 mg/day of PSL as the...",0,99,51406dd123fec90375000008_1001
5,51406dd123fec90375000008,summary,strong_snippet_neg,What is the treatment of subacute thyroiditis?,"Sequence variants, including the ε4 allele of ...",0,152,51406dd123fec90375000008_1002
6,51406dd123fec90375000008,summary,strong_snippet_neg,What is the treatment of subacute thyroiditis?,uk/spldb/SpliceDB.html and at http://www.softb...,0,51,51406dd123fec90375000008_1003
7,51406dd123fec90375000008,summary,strong_snippet_neg,What is the treatment of subacute thyroiditis?,"Moreover, MG-132, a proteasome inhibitor, prev...",0,67,51406dd123fec90375000008_1004
8,51406dd123fec90375000008,summary,strong_snippet_neg,What is the treatment of subacute thyroiditis?,"Here, we report on 2 boys, aged 10 and 5 years...",0,90,51406dd123fec90375000008_1005
9,51406dd123fec90375000008,summary,strong_snippet_neg,What is the treatment of subacute thyroiditis?,The gene copy number of human junctin and hAS...,0,157,51406dd123fec90375000008_1006


In [121]:
test_subset.to_pickle("../data/bioasq_df_test_tiny_sample.pickle")

In [128]:
df_tt = pd.read_pickle("../data/trials_topics_combined_all_years_qe_paper.pickle")
df_tt.head()

Unnamed: 0,score,id,brief_summary,brief_title,minimum_age,gender,primary_outcome,detailed_description,keywords,official_title,...,exclusion,topic,_,label,disease,gene,age,year,d_and_g,qe_all
0,1.0,NCT01774162,Endoscopic ultrasound (EUS) is a well-establis...,EUS-guided Fine Needle Biopsy With a New Core ...,6570,male,Sampling Adequacy at time of procedure The abi...,Background: Endoscopic ultrasound (EUS) is a w...,Endoscopic Ultrasound Fine needle aspiration F...,Endoscopic Ultrasound Guided Fine Needle Biops...,...,- No detectable lesion - lesion inaccessible t...,18,0,0,Pancreatic cancer,CDK6 Amplification,,2017,"Pancreatic cancer, CDK6 Amplification","[Pancreatic cancer, CDK6 Amplification, MCPH12..."
1,1.0,NCT01774162,Endoscopic ultrasound (EUS) is a well-establis...,EUS-guided Fine Needle Biopsy With a New Core ...,6570,female,Sampling Adequacy at time of procedure The abi...,Background: Endoscopic ultrasound (EUS) is a w...,Endoscopic Ultrasound Fine needle aspiration F...,Endoscopic Ultrasound Guided Fine Needle Biops...,...,- No detectable lesion - lesion inaccessible t...,27,0,0,Pancreatic adenocarcinoma,"KRAS, TP53",,2017,"Pancreatic adenocarcinoma, KRAS, TP53","[Pancreatic adenocarcinoma, KRAS, TP53, 'C-K-R..."
2,1.0,NCT01774162,Endoscopic ultrasound (EUS) is a well-establis...,EUS-guided Fine Needle Biopsy With a New Core ...,6570,female,Sampling Adequacy at time of procedure The abi...,Background: Endoscopic ultrasound (EUS) is a w...,Endoscopic Ultrasound Fine needle aspiration F...,Endoscopic Ultrasound Guided Fine Needle Biops...,...,- No detectable lesion - lesion inaccessible t...,28,0,0,Pancreatic ductal adenocarcinoma,ERBB3,,2017,"Pancreatic ductal adenocarcinoma, ERBB3","[Pancreatic ductal adenocarcinoma, ERBB3, ErbB..."
3,1.0,NCT01774162,Endoscopic ultrasound (EUS) is a well-establis...,EUS-guided Fine Needle Biopsy With a New Core ...,6570,female,Sampling Adequacy at time of procedure The abi...,Background: Endoscopic ultrasound (EUS) is a w...,Endoscopic Ultrasound Fine needle aspiration F...,Endoscopic Ultrasound Guided Fine Needle Biops...,...,- No detectable lesion - lesion inaccessible t...,30,0,0,Pancreatic adenocarcinoma,"RB1, TP53, KRAS",,2017,"Pancreatic adenocarcinoma, RB1, TP53, KRAS","[Pancreatic adenocarcinoma, RB1, TP53, KRAS, O..."
4,1.0,NCT01226147,An open-label study to evaluate the efficacy a...,Efficacy and Safety of Tamibarotene(AM80) for ...,7300,female,Renal Function 24 weeks Urinary Protein values...,Tamibarotene is a synthetic retinoid presently...,Lupus Nephritis SLE retinoid tamibarotene,,...,- Pregnant or breastfeeding female patients - ...,7,0,0,Lung cancer,EGFR (L858R),,2017,"Lung cancer, EGFR (L858R)","[Lung cancer, EGFR (L858R), ERBB ERBB1 HER1 NI..."
