# Auto-labeling of a corpus of clinical trials for later use in retrieving search results

In [611]:
import csv
import gensim
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing
import os
import re
import seaborn as sns
import spacy
import sys

from collections import OrderedDict
from gensim import corpora
from gensim.models.phrases import Phrases, Phraser
from gensim.models.ldamulticore import LdaMulticore
from gensim.models.coherencemodel import CoherenceModel
from gensim.parsing.preprocessing import STOPWORDS
from gensim.utils import simple_preprocess
from multiprocessing import Pool

nlp = spacy.load("en_core_web_lg")


Add a ton of stopwords relating to procedural things to make room for the medical conditions we're looking to classify.

In [889]:
STOPWORDS = set(STOPWORDS).union(set(['patient', 'patients', 'study', 'disease',
                                      'treatment', 'randomized', 'statistical', 
                                      'analysis', 'group', 'trial', 'clinical', 
                                      'controlled', 'safety', 'associated', 'risk', 
                                      'intervention', 'care', 'health', 'therapy', 
                                      'participants', 'method', 'monitor', 'studies',
                                      'cohorts', 'percent', 'prospective', 'efficacy', 
                                      'days', 'months', 'evaluate', 'subjects', 
                                      'data', 'outcomes', 'research', 'function', 
                                      'effects', 'investigators', 'use', 'population', 
                                      'compared', 'quality', 'results', 'improve',
                                      'term', 'groups', 'weeks', 'week', 'groups', 
                                      'test', 'control', 'time', 'period', 'placebo', 
                                      'stimulation', 'symptoms', 'mortality', 'failure',
                                      'non', 'interventional', 'observational', 'assess', 
                                      'relative', 'positive', 'develop', 'signs', 
                                      'enrolled', 'randomly', 'assigned', 'ratio', 
                                      'stratified', 'multicenter', 'open', 'phase', 
                                      'stage', 'iiib', 'eligible', 'criteria', 
                                      'inclusion', 'exclustion', 'year', 'years',
                                      'before', 'after', 'therapy', 'therapies',
                                      'interventions', 'controlling', 'terms', 'risks',
                                      'enroll', 'studies', 'diseases', 'enrolling',
                                      'evaluating', 'evaluated', 'evaluates', 'effect',
                                      'invesetigate', 'investigator', 'investigation',
                                      'investigations', 'studying', 'eligibility', "dose", 
                                      "screening", "history", "drug", "active", "including",
                                      "significant", "day", 'days', "potential", "female", 
                                      "mg", "hepatitis", "investigational", 'prior', 'known',
                                      'clinically', 'clincal', 'clinic', 'period', 'following',
                                      'subject', 'visit', 'subjects', 'willing', 
                                      'participation', 'lab', 'laboratory', 'medical',
                                      'response', 'diagnosis', 'stages', 'staged',
                                      'diagnoses', 'treat', 'treats', 'treated', 'program',
                                      'self', 'based', 'life', 'participate', 'english',
                                      'participates', 'able', 'community', 'support',
                                      'ability', 'coummunities', 'supports', 'supported',
                                      'consent', 'age', 'informed', 'consents', 'consented',
                                      'provide', 'provided', 'condition', 'conditions',
                                      'conditioned', 'compliance', 'enrollment',
                                      'accept', 'accepted', 'accepting', 'enrolment',
                                      'accepts', 'current', 'currently', 'controlled',
                                      'uncontrolled', 'status', 'recieved', 'times', 
                                      'limit', 'count', 'disorder', 'disorders', 'follow',
                                      'follows', 'followed', 'participant', 'related',
                                      'probability', 'probabilities', 'sample', 'samples',
                                      'practice', 'individual', 'individuals', 
                                      'individually', 'specific', 'specify', 'prevalence', 
                                      'limited', 'procedure', 'procedures', 'write',
                                      'obtain', 'practice', 'practices', 'practicing',
                                      'diagnostic', 'mg', 'mcg', 'ml', 'qday', 'tid',
                                      'qid', 'bid', 'po', 'pr', 'ac', 'prn', 'am', 'pm',
                                      'market', 'receive', 'received', 'receives', 
                                      'require', 'requires', 'required', 'start', 'end',
                                      'starts', 'ends', 'starting', 'ending', 'allow',
                                      'allows', 'allowed', 'define', 'defines', 'defined',
                                      'evaluation', 'ongoing', 'examination', 'evaluations',
                                      'examinations', 'evidence', 'upper','lower', 'normal',
                                      'people', 'person', 'exclusion', 'hour', 'hours', 'hr',
                                      'hrs', 'min', 'minute', 'minutes', 'include', 'equal',
                                      'equals', 'double', 'undergo', 'level', 'dl', 
                                      'diagnose', 'increase', 'increases', 'decrease', 
                                      'decreases', 'facility' 'implementation', 'center',
                                      'training', 'provider', 'providers', 'centers',
                                      'facilities', 'trainings', 'previous', 'assess',
                                      'assesses', 'assessment', 'assessments', 'assessing',
                                      'cause', 'service', 'services', 'project', 'projects',
                                      'identify', 'live', 'design', 'designs', 'survey',
                                      'surveys', 'surveying', 'implementation', 'implementations',
                                      
                                      ]))


In [890]:
def replace_csv_missing(row):
    text = (row[2] + ' ' +  # 'brief_title'
            row[3] + ' ' +  # 'condition'
            row[5] + ' ' +  # 'brief_summary'
            row[8][:500] + ' ' +  # 'eligibility' Keep first 500, tends to be exclusion criteria if it's very long.
            row[10] + ' ' +  # 'keyword'
            row[11] + ' ' +  # 'mesh_term'
            row[12]  # 'official_title
           )
    text = text.lower()
    new = re.sub('missing', '', text)
    new = re.sub('-', ' ', new)
    new = re.sub('\(\S*\)', ' ', new)
    new = re.sub(' \s+', ' ', new)
    return u"{}".format(new)

In [891]:
def replace_text_missing(text):
    text = text.lower().strip('\n')
    new = re.sub(' \s+', ' ', text)
    return u"{}".format(new)

In [892]:
def import_text_files(directory):
    bad_lines = ('service :',
                 'allergies :',
                 'chief complaint :',
                 'major surgical or invasive procedure :',
                 'history of present illness :',
                 'past medical history :',
                 'social history :',
                 'family history :',
                 'physical exam :',
                 'pertinent results :',
                 'brief hospital course :',
                 'medications on admission :',
                 'impression :',
                 'final diagnosis :',
                 'underlying medical condition :',
                 'discharge disposition :',
                 'discharge condition :',
                 'discharge instructions :',
                 'discharge medications :',
                 'followup instructions',
                 'signed',
                 '( ',
                 'job',
                 't :',
                 'y :',
                 'd :',
                 'med',
                 'dictated',
                 )
    lines = []
    for file in os.listdir(directory):
        if file.endswith('txt'):
            with open(directory + file, 'r') as f:
                for i, line in enumerate(f):
                    # Skip first 7 lines.
                    if i < 8:
                        pass
                    # Skip section header lines.
                    elif line.lower().startswith(bad_lines):
                        pass
                    else:
                        out =  replace_text_missing(line)
                        lines.append(out)
    return lines

In [893]:
def import_csv_files(file):
    all_rows = []
    with open(file, 'r') as csvinfile:
        csv_reader = csv.reader(csvinfile, delimiter=',')
        for row in csv_reader:
            all_rows.append(row)
    pool = Pool(processes=multiprocessing.cpu_count())    
    transformed_rows = pool.map(replace_csv_missing, all_rows)
    pool.close() 
    pool.join()
    return transformed_rows
    

Import data.

In [894]:
search_terms = import_csv_files('../frontend_dataset_final.csv')[1:]  # Discard header row.

In [895]:
len(search_terms)

22471

In [896]:
search_terms += import_text_files('beth_docs/')

In [897]:
len(search_terms)

34302

In [898]:
search_terms += import_text_files('partner_docs/')
len(search_terms)

43832

In [899]:
search_terms += import_text_files('beth_txt/')
len(search_terms)

50984

In [900]:
search_terms += import_text_files('partner_txt/')
len(search_terms)

57328

Split into training and test sets.

In [901]:
test_terms = search_terms.copy()[:1000]
search_terms = search_terms[-56328:]

In [902]:
len(search_terms), len(test_terms)

(56328, 1000)

In [903]:
search_terms[0]

'vestibulopathy with vestibulo ocular reflex gain deficit the study is examine the eye movements characteristics of patients with vor gain deficits (overt and covert saccades) before and after physical therapy intervention program and examine the most effective physical therapy treatment program for patients with vestibulopathy. vestibulopathy, acute peripheral inclusion criteria: diagnosed with unilateral or bilateral vestibulopathy living independently in the community exclusion criteria: cognitive state decline neurological disorder disease or disorders that can affect balance all 40 years n/a no dizziness vestibular neuronitis vestibulopathy with vestibulo ocular reflex gain deficit characteristics of overt and covert saccadic eye movements measured by the video head impulse test '

In [904]:
test_terms[0]

'prevalence of anti ccp positivity and subclinical signs of inflammation in patients with new onset of non specific musculoskeletal symptoms non interventional, prospective, observational study to assess the relative risk of anti ccp positive patients to develop signs of inflammation in accordance with early rheumatoid arthritis in a population without pre classified ra but new1 onset of non specific musculoskeletal symptoms in general practices in germany and subsequent 36 months follow up by rheumatologists rheumatoid arthritis population without pre classified ra but new onset of non specific musculoskeletal symptoms non probability sample inclusion criteria: new onset of non specific msk symptoms, including, but not limited to, arthralgia of the hands and the large joints such as wrists, knees, and shoulders written informed consent obtained prior to the initiation of any study protocol required procedures general understanding of study procedure and informed consent age ≥ 18 and ≤

Create lemmatization and tokenization functions.

In [905]:
def lemmatize(text):
    """Return new string of lemmatized words from text."""
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if token.text not in STOPWORDS])

In [906]:
def tokenize(text):
    """Return list of tokens from text."""
    return [token for token in simple_preprocess(text) if token not in STOPWORDS]

Apply functions element-wise with multiprocessing.

In [907]:
pool = Pool(processes=multiprocessing.cpu_count())
lemma = pool.map(lemmatize, search_terms)
pool.close()
pool.join()

pool = Pool(processes=multiprocessing.cpu_count())
keyword_tokens = pool.map(tokenize, lemma)
pool.close()
pool.join()

Apply Phraser to detect bi-grams and more-grams.

In [908]:
# token_ = [doc.split(" ") for doc in keyword_tokens]
bigram = Phrases(keyword_tokens,
                 min_count=10,
                 threshold=5,
                 delimiter=b' ')

bigram_phraser = Phraser(bigram)

bigram_tokens = []
for sent in keyword_tokens:
    bigram_tokens.append(bigram_phraser[sent])

Check bigrams for meaningfullness.

In [909]:
bigram_tokens[0]

['vestibulopathy',
 'vestibulo',
 'ocular',
 'reflex',
 'gain',
 'deficit',
 'examine',
 'eye movement',
 'characteristic',
 'vor',
 'gain',
 'deficit',
 'overt',
 'covert',
 'saccade',
 'physical',
 'examine',
 'effective',
 'physical',
 'vestibulopathy',
 'vestibulopathy',
 'acute',
 'peripheral',
 'unilateral bilateral',
 'vestibulopathy',
 'independently',
 'cognitive',
 'state',
 'decline',
 'neurological',
 'affect',
 'balance',
 'dizziness',
 'vestibular',
 'neuronitis',
 'vestibulopathy',
 'vestibulo',
 'ocular',
 'reflex',
 'gain',
 'deficit',
 'characteristic',
 'overt',
 'covert',
 'saccadic',
 'eye movement',
 'measure',
 'video',
 'head',
 'impulse']

Looks nice.

Create a gensim dictionary of our bigram tokens.

In [964]:
id2word = corpora.Dictionary(bigram_tokens)

In [965]:
len(id2word.keys())

55258

Filter our dictionary to, hopefully, more useful words.

In [966]:
id2word.filter_extremes(no_below=15, no_above=0.015)
len(id2word.keys())

10699

Create corpus for modeling.

In [967]:
corpus = [id2word.doc2bow(text) for text in bigram_tokens]

Fit an LDA model.

In [968]:
lda = LdaMulticore(corpus=corpus,
                   id2word=id2word,
                   chunksize=10000,
                   passes=15,
                   workers=multiprocessing.cpu_count(),
                   )

Test out our model.

In [994]:
def get_search_terms(index):
    """Return topics for a given index in the test_tems list."""
    # Preprocess new document.
    lemma = list(map(lemmatize, [test_terms[index]]))
    other_texts = list(map(tokenize, lemma))
    other_corpus = []
    for sent in other_texts:
        other_corpus.append(bigram_phraser[sent])
    corpus = [id2word.doc2bow(text) for text in other_corpus]
    unseen_doc = corpus[0]
    # Feed processed document to the model.
    vector = lda[unseen_doc]  # Get topic probability distribution for a document
    # Choose the set of topics the model gives the highest probability to.
    terms = [topic.split('*') for topic in lda.print_topic(vector[np.argmax(vector, axis=0)[1]][0], 30).split(' + ')]
             #if float(topic.split('*')[0]) >= .005]  # Set a model probability cut-off for topic inclusion.
    return terms

In [995]:
index_to_search = 949

In [996]:
test_terms[index_to_search]

'comparison of two strategies for endotracheal tube cuff underinflation prevention during invasive mechanical ventilation during invasive mechanical ventilation, maintaining endotracheal tube cuff pressure around 25 cmh2o is recommended for sealing the upper airways. the continuous control of pcuff with a simple mechanical device, the tracoe smart cuffmanagertm, has never been assessed. the investigators hypothesize that the tracoe smart cuffmanagertm would allow a reduction of the incidence of underinflation episodes, as compared with the intermittent strategy of pcuff control. acute brain injury inclusion criteria: with severe acute brain damage admitted in the icu for less than 48 hours and expected to receive invasive mechanical ventilation (through orotracheal tube with a low pressure high volume cuff) for at least 48 hours after inclusion. exclusion criteria: change in the upper airway management within the 48 hours following the inclusion (extubation or change in the tracheal tu

In [997]:
get_search_terms(index_to_search)

[['0.024', '"ventilation"'],
 ['0.022', '"respiratory"'],
 ['0.021', '"oxygen"'],
 ['0.019', '"intubation"'],
 ['0.014', '"flow"'],
 ['0.012', '"needle"'],
 ['0.011', '"high flow"'],
 ['0.009', '"nasal"'],
 ['0.009', '"pressure"'],
 ['0.008', '"tube"'],
 ['0.008', '"invasive"'],
 ['0.008', '"ventilator"'],
 ['0.007', '"aspiration"'],
 ['0.007', '"esophageal"'],
 ['0.007', '"nasal cannula"'],
 ['0.006', '"membrane"'],
 ['0.006', '"mask"'],
 ['0.006', '"pancreatic"'],
 ['0.006', '"intubate"'],
 ['0.005', '"oxygenation"'],
 ['0.005', '"progesterone"'],
 ['0.005', '"invasive ventilation"'],
 ['0.005', '"prevention"'],
 ['0.004', '"mass"'],
 ['0.004', '"pneumothorax"'],
 ['0.004', '"airway"'],
 ['0.004', '"endoscopic ultrasound"'],
 ['0.004', '"solid"'],
 ['0.004', '"place"'],
 ['0.004', '"eus"']]

Pretty good.

In [998]:
index_to_search = 244

In [999]:
test_terms[index_to_search]

'community based intervention effects on older adults` physical activity the research team will conduct a 2 x 2 factorial experiment testing the individual and combined effects of two empirically and theoretically relevant sets of behavior change strategies on community dwelling older adults` physical activity. to do this the investigators will randomize participants >= 70 years old (n = 308) to 1 of 4 experimental conditions. all conditions include an evidence based physical activity protocol endorsed by centers for disease control and prevention for use by all older adults, including those with frailty and multiple co morbidities and the commercially available physical activity monitor (e.g., fitbit) to augment intervention delivery. intervention components that are experimental and vary by condition are the sets of behavior change strategies which will be combined with the physical activity protocol and the physical activity monitor. condition 1 has no specific behavior change strat

In [1000]:
get_search_terms(index_to_search)

[['0.014', '"youth"'],
 ['0.012', '"app"'],
 ['0.011', '"behavior"'],
 ['0.010', '"adhd"'],
 ['0.009', '"access"'],
 ['0.009', '"social"'],
 ['0.009', '"mobile"'],
 ['0.008', '"smartphone"'],
 ['0.008', '"african american"'],
 ['0.007', '"speak"'],
 ['0.007', '"address"'],
 ['0.007', '"internet"'],
 ['0.007', '"behavioral"'],
 ['0.006', '"technology"'],
 ['0.006', '"hyperactivity"'],
 ['0.006', '"focus"'],
 ['0.006', '"engage"'],
 ['0.006', '"tailor"'],
 ['0.006', '"feasibility"'],
 ['0.006', '"attention deficit"'],
 ['0.006', '"team"'],
 ['0.006', '"tool"'],
 ['0.005', '"goal"'],
 ['0.005', '"engagement"'],
 ['0.005', '"promote"'],
 ['0.005', '"communication"'],
 ['0.005', '"phone"'],
 ['0.005', '"mhealth"'],
 ['0.005', '"barrier"'],
 ['0.005', '"parent"']]

In [1001]:
index_to_search = 847

In [1002]:
test_terms[index_to_search]

'web based orthopaedic sports medicine registry the purpose of this study is to establish an international, web based clinical registry to collect baseline characteristics of patients undergoing orthopaedic, sports medicine, arthroscopy, and related surgery, and the subsequent outcomes and cost effectiveness associated with the surgical procedures and nonoperative treatments. degenerative and traumatic pathology of the knee cohort will be selected from clinic patients scheduling surgery or nonoperative treatment non probability sample inclusion criteria: all patients electing to schedule routine and medically indicated orthopedic, sports medicine, arthroscopy and related surgery or nonoperative treatment exclusion criteria: non english or spanish speaking vulnerable populations, excluding minors all 12 years n/a no orthopedic surgery orthopaedic sports medicine, arthroscopy, and related surgery registry using the web based orthoillustrated surgical outcomes system'

In [1003]:
get_search_terms(index_to_search)

[['0.046', '"injection"'],
 ['0.029', '"limb"'],
 ['0.028', '"extremity"'],
 ['0.028', '"muscle"'],
 ['0.023', '"hand"'],
 ['0.020', '"movement"'],
 ['0.014', '"rehabilitation"'],
 ['0.013', '"lumbar"'],
 ['0.013', '"virtual reality"'],
 ['0.011', '"physiotherapy"'],
 ['0.011', '"fusion"'],
 ['0.011', '"low pain"'],
 ['0.011', '"botulinum toxin"'],
 ['0.010', '"functional"'],
 ['0.008', '"joint"'],
 ['0.008', '"unilateral"'],
 ['0.007', '"bilateral"'],
 ['0.006', '"degenerative"'],
 ['0.006', '"strength"'],
 ['0.006', '"spasticity"'],
 ['0.006', '"range motion"'],
 ['0.006', '"spinal"'],
 ['0.005', '"body"'],
 ['0.005', '"dystonia"'],
 ['0.005', '"conservative"'],
 ['0.005', '"finger"'],
 ['0.005', '"mitochondrial"'],
 ['0.005', '"neurological"'],
 ['0.004', '"mechanical"'],
 ['0.004', '"neuromuscular"']]

Also pretty good.

In [1004]:
index_to_search = 682

In [1005]:
test_terms[index_to_search]

'pan asia united states prevention of sudden cardiac death catheter ablation trial the current standard of care for ventricular tachycardia includes the use of medicine called anti arrhythmic drugs and implantable cardioverter defibrillator therapy. these treatments are used to terminate the irregular heartbeats and bring the heart back to a normal rhythm. catheter ablation is a procedure used to eliminate the heart cells causing the arrhythmia. patients eligible for this may benefit from an ablation procedure in addition to an icd to treat their vt condition or risk of developing vt. this study aims to show that treating vt with catheter ablation, if performed preemptively at the time of icd implantation, will reduce subsequent recurrent vt, icd shocks, and lead to improved survival. ventricular tachycardia inclusion criteria: patient is receiving a new implantable cardioverter defibrillator or cardiac resynchronization therapy device (crt d) implant that has study required programing

In [1006]:
get_search_terms(index_to_search)

[['0.155', '"breast cancer"'],
 ['0.033', '"breast neoplasm"'],
 ['0.026', '"metastatic breast"'],
 ['0.020', '"survivor"'],
 ['0.020', '"endocrine"'],
 ['0.016', '"icd"'],
 ['0.015', '"edema"'],
 ['0.010', '"er"'],
 ['0.010', '"breast"'],
 ['0.010', '"hormone receptor"'],
 ['0.009', '"cancer survivor"'],
 ['0.009', '"estrogen receptor"'],
 ['0.009', '"postmenopausal woman"'],
 ['0.008', '"death"'],
 ['0.008', '"aromatase inhibitor"'],
 ['0.008', '"fulvestrant"'],
 ['0.008', '"early breast"'],
 ['0.008', '"optical coherence"'],
 ['0.007', '"tamoxifen"'],
 ['0.006', '"tomography"'],
 ['0.006', '"hormonal"'],
 ['0.006', '"sudden cardiac"'],
 ['0.006', '"iii"'],
 ['0.005', '"postmenopausal"'],
 ['0.005', '"defibrillator"'],
 ['0.005', '"adjuvant"'],
 ['0.005', '"palbociclib"'],
 ['0.005', '"diabetic macular"'],
 ['0.005', '"implantable cardioverter"'],
 ['0.005', '"oct"']]

In [1007]:
index_to_search = 724

In [1008]:
test_terms[index_to_search]

'single agent and combined inhibition after allogeneic stem cell transplant the purpose of the study is to determine the safety and benefit of nivolumab, ipilimumab or the combination of nivolumab with ipilimumab given after bone marrow transplant for patients with acute myelogenous leukemia and myelodysplastic syndrome. acute myeloid leukemia and myelodysplastic syndrome inclusion criteria: 1. voluntary signed and dated irb/iec approved written informed consent form in accordance with regulatory and local guidelines. 2. be 18 years or older and 70 years or younger on the day of signing consent 3. have a confirmed diagnosis of non m3 acute myeloid leukemia (intermediate ii is high risk. our population will consist of intermediate ii and high risk patients or any flt3+ aml) or ipss intermediate 2 or high risk myelodysplastic syndrome (appendice leukemia, myeloid, acute phase i study of single agent and combined checkpoint inhibition after allogeneic hematopoietic stem cell transplantati

In [1009]:
get_search_terms(index_to_search)

[['0.056', '"leukemia"'],
 ['0.036', '"transplant"'],
 ['0.035', '"stem cell"'],
 ['0.032', '"transplantation"'],
 ['0.028', '"donor"'],
 ['0.024', '"bone marrow"'],
 ['0.023', '"acute myeloid"'],
 ['0.016', '"aml"'],
 ['0.013', '"cell transplantation"'],
 ['0.011', '"acute lymphoblastic"'],
 ['0.010', '"relapse"'],
 ['0.010', '"recipient"'],
 ['0.010', '"hematopoietic stem"'],
 ['0.009', '"host"'],
 ['0.009', '"myelodysplastic syndrome"'],
 ['0.008', '"mds"'],
 ['0.008', '"graft versus"'],
 ['0.007', '"cell transplant"'],
 ['0.007', '"peripheral blood"'],
 ['0.007', '"acute leukemia"'],
 ['0.006', '"allogeneic hematopoietic"'],
 ['0.006', '"graft"'],
 ['0.006', '"hsct"'],
 ['0.006', '"newly"'],
 ['0.005', '"allogeneic"'],
 ['0.005', '"cord blood"'],
 ['0.005', '"relapse refractory"'],
 ['0.005', '"induction"'],
 ['0.005', '"post transplant"'],
 ['0.005', '"autologous"']]

In [1010]:
index_to_search = 754

In [1011]:
test_terms[index_to_search]

'adapting project uplift for blacks in georgia specific aim 8. evaluate the efficacy of project uplift for reducing symptoms of anxiety, depression, and ptsd among african americans at immediate posttest, after 3 months, and after 5 months. depressive symptoms inclusion criteria: identify as african american or black; diagnosed by a healthcare provider with epilepsy or seizure disorder at least 3 months ago; is mildly to moderately depressed or mildly to moderately anxious as determined by the phq 4; willing to be audio recorded during uplift sessions exclusion criteria: does not self identify as african american or black; is not depressed or anxious as determined by the phq 4; is severely depressed or anxious as determined by the phq 4; r epilepsy depression adapting evidence based epilepsy self management programs for blacks in georgia'

In [1012]:
get_search_terms(index_to_search)

[['0.072', '"depression"'],
 ['0.023', '"depressive"'],
 ['0.022', '"major depressive"'],
 ['0.020', '"cardiac arrest"'],
 ['0.017', '"episode"'],
 ['0.013', '"antidepressant"'],
 ['0.011', '"mood"'],
 ['0.011', '"bipolar"'],
 ['0.011', '"psychiatric"'],
 ['0.010', '"ketamine"'],
 ['0.010', '"fibromyalgia"'],
 ['0.009', '"dsm"'],
 ['0.009', '"depressed"'],
 ['0.009', '"mdd"'],
 ['0.008', '"major depression"'],
 ['0.007', '"psychosis"'],
 ['0.007', '"item"'],
 ['0.006', '"scid"'],
 ['0.006', '"mental"'],
 ['0.006', '"eeg"'],
 ['0.006', '"rating scale"'],
 ['0.006', '"interview"'],
 ['0.006', '"version"'],
 ['0.006', '"psychotic"'],
 ['0.005', '"cognitive behavioral"'],
 ['0.005', '"illness"'],
 ['0.005', '"resistant depression"'],
 ['0.005', '"methamphetamine"'],
 ['0.004', '"resistant"'],
 ['0.004', '"mechanism"']]

In [1013]:
index_to_search = 74

In [1014]:
test_terms[index_to_search]

'evaluation of short term outcome of different bifurcation stenting techniques at assuit university cath. lab primary aim: evaluation of the short term outcome of different techniques used in bifurcational coronary arteries intervention regarding major adverse cardiac event : cardiac death, myocardial infarction, target vessel revascularization, or stent thrombosis and occurrence of unstable angina with ecg changes and echo findings in the same target vessel in assiut university cath. lab. secondary aim: calculation of the percentage of bifurcational coronary arteries intervention in assiut university cath.lab stemi st elevation myocardial infarction this study will include patients diagnosed as bifurcational coronary arteries iesion underwent intervention non probability sample inclusion criteria: all patients with true bifurcational coronary arteries lesion defined as (lesions in which there is more than 50 percent diameter stenosis in both the parent vessel and the ostium of the sid

In [1015]:
get_search_terms(index_to_search)

[['0.033', '"coronary artery"'],
 ['0.028', '"myocardial infarction"'],
 ['0.027', '"stent"'],
 ['0.027', '"acute coronary"'],
 ['0.022', '"guide"'],
 ['0.020', '"cardiac surgery"'],
 ['0.017', '"pci"'],
 ['0.017', '"coronary"'],
 ['0.012', '"percutaneous coronary"'],
 ['0.012', '"acs"'],
 ['0.012', '"myocardial"'],
 ['0.009', '"cabg"'],
 ['0.009', '"infarction"'],
 ['0.008', '"indication"'],
 ['0.008', '"vessel"'],
 ['0.008', '"st elevation"'],
 ['0.008', '"cc"'],
 ['0.007', '"complex"'],
 ['0.007', '"unstable angina"'],
 ['0.007', '"cardiogenic shock"'],
 ['0.007', '"volume"'],
 ['0.007', '"ischemic heart"'],
 ['0.006', '"bypass graft"'],
 ['0.006', '"bypass grafting"'],
 ['0.006', '"postoperative"'],
 ['0.006', '"troponin"'],
 ['0.006', '"stenting"'],
 ['0.005', '"elute stent"'],
 ['0.005', '"mi"'],
 ['0.005', '"colchicine"']]

In [1016]:
index_to_search = 679

In [1017]:
test_terms[index_to_search]

'diet for induction and maintenance of remission and re biosis in crohn`s disease the modified exclusive enteral nutrition is an open label randomized controlled trial in mild to severe crohn`s disease patients. the purpose of this study is to determine whether induction of remission and maintenance of remission can be achieved with a new dietary strategy that involves only 2 weeks of exclusive enteral nutrition with modulen and 12 weeks of an exclusion diet involving selected table foods. this novel approach will be compared to the gold standard dietary regime involving 8 weeks of een. crohn`s disease inclusion criteria: 1. established diagnosis of crohn`s disease. 2. patients with mild to severe active crohn`s disease 3. ages 8 18 4. duration of disease ≤ 36 months 5. active inflammation (crp≥>0.6 mg /dl or esr≥>20 or calprotectin≥>200 mcg/gr within the past 3 weeks) during screening 6. patients with b1, p0 uncomplicated disease at enrollment 7. patients with disease defined as l1, l

In [1018]:
get_search_terms(index_to_search)

[['0.031', '"trauma"'],
 ['0.021', '"diet"'],
 ['0.019', '"supplementation"'],
 ['0.016', '"supplement"'],
 ['0.014', '"intake"'],
 ['0.013', '"inflammation"'],
 ['0.012', '"dietary"'],
 ['0.012', '"gut"'],
 ['0.011', '"gut microbiota"'],
 ['0.010', '"intestinal"'],
 ['0.010', '"acid"'],
 ['0.009', '"microbiota"'],
 ['0.009', '"consumption"'],
 ['0.007', '"food"'],
 ['0.007', '"oxidative stress"'],
 ['0.007', '"probiotic"'],
 ['0.007', '"product"'],
 ['0.007', '"composition"'],
 ['0.007', '"carbohydrate"'],
 ['0.007', '"role"'],
 ['0.006', '"sugar"'],
 ['0.006', '"consume"'],
 ['0.006', '"nutritional"'],
 ['0.006', '"inflammatory"'],
 ['0.006', '"antioxidant"'],
 ['0.005', '"fatty acid"'],
 ['0.005', '"extract"'],
 ['0.005', '"metabolism"'],
 ['0.005', '"bacteria"'],
 ['0.005', '"red"']]