In [8]:
#############################################################################################################
# Extracting chemical-disease associations from the biological literature
# R214: Main Practical
# Jan Ondras (jo356), Trinity College
#############################################################################################################
# Check agreement between NE tags and matched concept types (target PubMed)
# Label mismatch count:  4261
# example: sed -n '1025017,1025037p' < pubmed-abstracts.conll
# ALSO CHECKED consistency of NE tags within the NE => consistent!
#############################################################################################################
# 2. cell: check concepts without beginning (on target PubMed)

import numpy as np
import matplotlib.pyplot as plt
import time
import glob

#########################
labelMismatchCnt = 0    #
NEtagLabels = []        #
#########################

# Dictionaries of total counts of occurrences of chemical/disease: 
# key=chemicalCode/diseaseCode, value=count 
# if chemical/disease is not in this dictionary then assume zero count
chemCnts = {} 
diseCnts = {}
# Dictionary of cooccurrences: key=chemicalCode+diseaseCode, value=count
cooc = {}     

TMP_chemCnts = {} # dictionaries as above, but temporary, for current sentence only
TMP_diseCnts = {}

tmpStr = '' # temporary string storing a current concept being parsed

########################################################################################
# HELPER FUNCTIONS 

# Abstract the given full concept name (c)
def abstract_concept(c):
    # Remove the following characters from (c)
    rmv_chars = " '-,/*+.!@#$%^&*()_={}[];\:<>?" 
    #rmv_chars = " " 
    c = c.translate(None, rmv_chars)
    c = c.translate(None, '"') # quotes are removed separately
    # Convert to lower case
    c = c.lower()
    return c

# End of concept check & update: update counts once a whole concept is parsed
def eoc():
    
    global NEtagLabel
    global labelMismatchCnt
    global NEtagLabels
    global LINE_NUM
    
    global tmpStr
    global TMP_diseCnts
    global TMP_chemCnts
    global cnt_matched
    global cnt_NOT_matched

    if tmpStr != '': # yes, temporary string is not empty
        # Simple approximate string matching
        ac = abstract_concept(tmpStr)
        # Is this abstracted concept in our grounding dictionary ?
        if ac in ground_dict:
            matched_MESH_concept = ground_dict[ac]
            mLabel = matched_MESH_concept[1]  # matched label = chemical / disease   
            mCode =  matched_MESH_concept[2]  # matched code of chemical / disease
            #print "YES: ", tmpStr, "<==>", matched_MESH_concept[0], "\t *** ", ac
            
            # CHECK LABEL AGREEMENT NE ~ CONCEPT
            if NEtagLabel != mLabel:
                print LINE_NUM, "Label mismatch: ", NEtagLabel, NEtagLabels[-1], mLabel
                labelMismatchCnt += 1
                if NEtagLabel != NEtagLabels[-1]: # just a sanity check, never happened
                    print LINE_NUM, NEtagLabel, NEtagLabels
                
            # CHECK LABEL AGREEMENT WITHIN NE
            for iii in NEtagLabels[1:]:
                if iii != NEtagLabels[0]:
                    print LINE_NUM, "Label mismatch of NE items within NE:", NEtagLabels, tmpStr
            
            # UPDATE COUNTS for this concept
            if mLabel == 'Disease':
                if mCode not in TMP_diseCnts.keys():
                    TMP_diseCnts[mCode] = 1
                else:
                    TMP_diseCnts[mCode] += 1
            elif mLabel == 'Chemical':
                if mCode not in TMP_chemCnts.keys():
                    TMP_chemCnts[mCode] = 1
                else:
                    TMP_chemCnts[mCode] += 1
            else:
                raise ValueError('Unknown concept!')

            cnt_matched += 1
        else:
            #print "NO :", tmpStr
            cnt_NOT_matched += 1
        
        tmpStr = ''
        
        NEtagLabels = []
        
# End of sentence update: update total counts based on temporary counts from this sentence, once a whole sentence is parsed
def eos():
    
    global chemCnts
    global diseCnts
    global cooc
    
    for chem in TMP_chemCnts.keys():
        # Update chemical counts
        if chem not in chemCnts.keys(): 
            chemCnts[chem] = 1
        else:
            chemCnts[chem] += 1

        for dise in TMP_diseCnts.keys():
            # Update disease counts
            if dise not in diseCnts.keys():
                diseCnts[dise] = 1
            else:
                diseCnts[dise] += 1

            # Update co-occurences - minimum of 2x in total will be considered by filtering later
            k = chem + '+' + dise # construct key into dictionary of cooccurences: chemicalCode+diseaseCode, always this ordering
            if k not in cooc.keys():
                cooc[k] = 1
            else:
                cooc[k] += 1

# END OF HELPER FUNCTIONS 
########################################################################################

# Load MESH concepts: <name, type (Chemical or Disease), code>
MESH = np.loadtxt('./../Dataset/CDR_MeSH.tsv', delimiter='\t', skiprows=0, dtype=str) # <name, label, code>

# Construct the Gounding dictionary
# maps grounded/abstracted concept to list [full concept name, type (Chemical or Disease), code]
ground_dict = {}
for r in MESH:
    ac = abstract_concept(r[0])
    ground_dict[ac] = r
    
########################################################################################
########################################################################################
                    
# Iterate over PubMed abstracts and associated tags (produced by named entity recogniser)

st = time.time()
N_sentences = 0      # to count number of sentences
LINE_NUM = 0
cnt_matched = 0      # count matched concepts
cnt_NOT_matched = 0  # count not-matched concepts
cnt_cwb = 0          # count concepts without beginning tag B-...

with open('/home/janciovec/Desktop/pubmed-abstracts.conll') as f_NE:     # named entities;  301,084,933 lines, using grep -c '' pubmed-abstracts_tags
    with open('/home/janciovec/Desktop/pubmed-abstracts_tags') as f_TA:  # associated tags; 301,084,933 lines
        
        # Iterate over all lines
        while True: 
            
            LINE_NUM += 1

            # Read line - named entity
            rl = f_NE.readline()
            rlwn = rl.split('\n')[0] # read line skipping the new line character

            # End of file
            if rl == '':
                if f_TA.readline() != '':
                    raise ValueError("File with tags does not match the file with named entities!")
                # End of concept check & update
                eoc()
                # End of sentence update
                eos()
                break

            # End of sentence
            elif rlwn == '':
                if f_TA.readline() != '\n':
                    raise ValueError("File with tags does not match the file with named entities!")
                # End of concept check & update
                eoc()
                # End of sentence update
                eos()
                # Reset temporary counters for next sentence
                TMP_chemCnts = {}
                TMP_diseCnts = {}
                N_sentences += 1

            # Beginning or within sentence
            else:
                # Read associated tag
                tag = f_TA.readline().split('\n')[0]
                # Named entity - first item at the line
                NE = rlwn.split('\t')[0]
                
                # Beginning of concept
                if tag[0] == 'B': 

                    # End of concept check & update - since there could be a concept ending right now
                    eoc()
                    # Start new concept
                    tmpStr = NE
                    NEtagLabel = tag[2:]
                    NEtagLabels.append( NEtagLabel )
                    
                # Continuation of the concept name
                elif tag[0] == 'I': 
                    
                    if tmpStr == '': # Concept without beginning: start new one from this string
                        if TMP_chemCnts != {}:
                            #print LINE_NUM, "Concept without beginning! NOT BEGIN OF SENTENCE"
                            cnt_cwb += 1
                            
#                     else:
#                         # from NE item to NE item, check if consecutive NE item tags agree
#                         if NEtagLabel != tag[2:]:
#                             print "NE item tag change within NE:", LINE_NUM, tmpStr
                    
                    tmpStr = tmpStr + NE # concatenate words without a space between them
        
                    NEtagLabel = tag[2:]
                    NEtagLabels.append( NEtagLabel )
                    
                elif tag[0] == 'O':
                    # End of concept check & update
                    eoc()
                    
                    NEtagLabel = tag[0]
#                     NEtagLabels.append( NEtagLabel )
                else:
                    raise ValueError("Unknown tag!")
                    
            if LINE_NUM % 30000000 == 0:
                print 100.*LINE_NUM/301084933, " % processed; time so far: ", time.time()-st, " s"

print "Time taken ...", time.time()-st, " s"

print "Total # concepts: ", cnt_NOT_matched + cnt_matched
print "Matched concepts: ", cnt_matched

print "Concepts without beginning tag (B-...): ", cnt_cwb

print "Total # sentences: ", N_sentences

print "Label mismatch count: ", labelMismatchCnt

##############################################################################################################
# Total time: 1568.43784904 seconds = 26.14 minutes (with printing), 20.42 min
# Total # concepts:   9,105,005
# Matched concepts:   5,940,715
# Concepts without beginning tag (B-...):  452
# Total # sentences: 10,573,978

# To give examples of concepts witout beginning
# sed -n '1750573,1750577p' < pubmed-abstracts_tags
# sed -n '1750573,1750577p' < pubmed-abstracts.conll

53462 Label mismatch:  Disease Disease Chemical
113231 Label mismatch:  Chemical Chemical Disease
211427 Label mismatch:  Chemical Chemical Disease
211539 Label mismatch:  Chemical Chemical Disease
211561 Label mismatch:  Chemical Chemical Disease
224549 Label mismatch:  Chemical Chemical Disease
332022 Label mismatch:  Chemical Chemical Disease
590789 Label mismatch:  Chemical Chemical Disease
622167 Label mismatch:  Chemical Chemical Disease
1025027 Label mismatch:  Chemical Chemical Disease
1026812 Label mismatch:  Chemical Chemical Disease
1027011 Label mismatch:  Chemical Chemical Disease
1091093 Label mismatch:  Chemical Chemical Disease
1091625 Label mismatch:  Chemical Chemical Disease
1091666 Label mismatch:  Chemical Chemical Disease
1091676 Label mismatch:  Chemical Chemical Disease
1091697 Label mismatch:  Chemical Chemical Disease
1091917 Label mismatch:  Chemical Chemical Disease
1266307 Label mismatch:  Chemical Chemical Disease
1357835 Label mismatch:  Chemical Chemical

13393092 Label mismatch:  Chemical Chemical Disease
13449879 Label mismatch:  Chemical Chemical Disease
13450039 Label mismatch:  Chemical Chemical Disease
13500433 Label mismatch:  Chemical Chemical Disease
13692340 Label mismatch:  Disease Disease Chemical
13732976 Label mismatch:  Chemical Chemical Disease
13914234 Label mismatch:  Chemical Chemical Disease
13914324 Label mismatch:  Chemical Chemical Disease
13914333 Label mismatch:  Chemical Chemical Disease
13914385 Label mismatch:  Chemical Chemical Disease
13914422 Label mismatch:  Chemical Chemical Disease
13914433 Label mismatch:  Chemical Chemical Disease
13914478 Label mismatch:  Chemical Chemical Disease
13994028 Label mismatch:  Chemical Chemical Disease
13994046 Label mismatch:  Chemical Chemical Disease
13994069 Label mismatch:  Chemical Chemical Disease
13994109 Label mismatch:  Chemical Chemical Disease
14475662 Label mismatch:  Chemical Chemical Disease
14475676 Label mismatch:  Chemical Chemical Disease
14479617 Labe

25721316 Label mismatch:  Chemical Chemical Disease
25770213 Label mismatch:  Chemical Chemical Disease
26114289 Label mismatch:  Chemical Chemical Disease
26238607 Label mismatch:  Chemical Chemical Disease
26310401 Label mismatch:  Chemical Chemical Disease
26346605 Label mismatch:  Chemical Chemical Disease
26395890 Label mismatch:  Chemical Chemical Disease
26395901 Label mismatch:  Chemical Chemical Disease
26395948 Label mismatch:  Chemical Chemical Disease
26395980 Label mismatch:  Chemical Chemical Disease
26396129 Label mismatch:  Chemical Chemical Disease
26396179 Label mismatch:  Chemical Chemical Disease
26475209 Label mismatch:  Chemical Chemical Disease
26659991 Label mismatch:  Chemical Chemical Disease
26660057 Label mismatch:  Chemical Chemical Disease
26660336 Label mismatch:  Chemical Chemical Disease
26660400 Label mismatch:  Chemical Chemical Disease
26867237 Label mismatch:  Chemical Chemical Disease
26868955 Label mismatch:  Chemical Chemical Disease
26959512 Lab

36095557 Label mismatch:  Chemical Chemical Disease
36161195 Label mismatch:  Chemical Chemical Disease
36213314 Label mismatch:  Chemical Chemical Disease
36563865 Label mismatch:  Disease Disease Chemical
36584205 Label mismatch:  Chemical Chemical Disease
36590899 Label mismatch:  Chemical Chemical Disease
36626678 Label mismatch:  Disease Disease Chemical
36640008 Label mismatch:  Chemical Chemical Disease
36748715 Label mismatch:  Chemical Chemical Disease
36755322 Label mismatch:  Chemical Chemical Disease
36789671 Label mismatch:  Chemical Chemical Disease
36883952 Label mismatch:  Chemical Chemical Disease
36900930 Label mismatch:  Chemical Chemical Disease
36967188 Label mismatch:  Disease Disease Chemical
37036796 Label mismatch:  Chemical Chemical Disease
37073139 Label mismatch:  Chemical Chemical Disease
37177603 Label mismatch:  Chemical Chemical Disease
37320891 Label mismatch:  Chemical Chemical Disease
37320912 Label mismatch:  Chemical Chemical Disease
37412074 Label 

45280671 Label mismatch:  Chemical Chemical Disease
45307113 Label mismatch:  Chemical Chemical Disease
45307169 Label mismatch:  Chemical Chemical Disease
45307412 Label mismatch:  Chemical Chemical Disease
45402496 Label mismatch:  Chemical Chemical Disease
45404055 Label mismatch:  Chemical Chemical Disease
45404082 Label mismatch:  Chemical Chemical Disease
45505289 Label mismatch:  Chemical Chemical Disease
45544323 Label mismatch:  Chemical Chemical Disease
45575273 Label mismatch:  Chemical Chemical Disease
45575426 Label mismatch:  Chemical Chemical Disease
45575586 Label mismatch:  Chemical Chemical Disease
45583027 Label mismatch:  Chemical Chemical Disease
45710157 Label mismatch:  Chemical Chemical Disease
45858341 Label mismatch:  Chemical Chemical Disease
45870931 Label mismatch:  Chemical Chemical Disease
45909122 Label mismatch:  Chemical Chemical Disease
45911694 Label mismatch:  Chemical Chemical Disease
45933624 Label mismatch:  Disease Disease Chemical
45941738 Labe

56807578 Label mismatch:  Chemical Chemical Disease
56807592 Label mismatch:  Chemical Chemical Disease
56907712 Label mismatch:  Chemical Chemical Disease
57152974 Label mismatch:  Chemical Chemical Disease
57241573 Label mismatch:  Chemical Chemical Disease
57295411 Label mismatch:  Chemical Chemical Disease
57499424 Label mismatch:  Chemical Chemical Disease
57559121 Label mismatch:  Chemical Chemical Disease
57559168 Label mismatch:  Chemical Chemical Disease
57587408 Label mismatch:  Chemical Chemical Disease
57591591 Label mismatch:  Chemical Chemical Disease
57636392 Label mismatch:  Chemical Chemical Disease
57783459 Label mismatch:  Chemical Chemical Disease
57783489 Label mismatch:  Chemical Chemical Disease
57839111 Label mismatch:  Chemical Chemical Disease
57853736 Label mismatch:  Chemical Chemical Disease
58073421 Label mismatch:  Chemical Chemical Disease
58475261 Label mismatch:  Chemical Chemical Disease
58577472 Label mismatch:  Chemical Chemical Disease
58680026 Lab

68058031 Label mismatch:  Chemical Chemical Disease
68229985 Label mismatch:  Chemical Chemical Disease
68384233 Label mismatch:  Chemical Chemical Disease
68384316 Label mismatch:  Chemical Chemical Disease
68386439 Label mismatch:  Chemical Chemical Disease
68518253 Label mismatch:  Chemical Chemical Disease
68637947 Label mismatch:  Chemical Chemical Disease
68652519 Label mismatch:  Chemical Chemical Disease
68772469 Label mismatch:  Chemical Chemical Disease
68772492 Label mismatch:  Chemical Chemical Disease
68775447 Label mismatch:  Chemical Chemical Disease
69029816 Label mismatch:  Chemical Chemical Disease
69082390 Label mismatch:  Chemical Chemical Disease
69140229 Label mismatch:  Chemical Chemical Disease
69192373 Label mismatch:  Chemical Chemical Disease
69192662 Label mismatch:  Chemical Chemical Disease
69388062 Label mismatch:  Chemical Chemical Disease
69388231 Label mismatch:  Chemical Chemical Disease
69430717 Label mismatch:  Chemical Chemical Disease
69451879 Lab

79785004 Label mismatch:  Disease Disease Chemical
80059433 Label mismatch:  Chemical Chemical Disease
80059642 Label mismatch:  Chemical Chemical Disease
80067855 Label mismatch:  Chemical Chemical Disease
80140711 Label mismatch:  Chemical Chemical Disease
80149798 Label mismatch:  Chemical Chemical Disease
80149827 Label mismatch:  Chemical Chemical Disease
80198381 Label mismatch:  Chemical Chemical Disease
80297400 Label mismatch:  Chemical Chemical Disease
80297513 Label mismatch:  Chemical Chemical Disease
80320235 Label mismatch:  Chemical Chemical Disease
80366431 Label mismatch:  Chemical Chemical Disease
80402993 Label mismatch:  Disease Disease Chemical
80597443 Label mismatch:  Disease Disease Chemical
80630961 Label mismatch:  Chemical Chemical Disease
80716240 Label mismatch:  Chemical Chemical Disease
80716328 Label mismatch:  Chemical Chemical Disease
80858762 Label mismatch:  Chemical Chemical Disease
80858917 Label mismatch:  Chemical Chemical Disease
80889193 Label 

91116274 Label mismatch:  Chemical Chemical Disease
91142052 Label mismatch:  Chemical Chemical Disease
91157599 Label mismatch:  Chemical Chemical Disease
91222970 Label mismatch:  Chemical Chemical Disease
91230648 Label mismatch:  Chemical Chemical Disease
91283093 Label mismatch:  Chemical Chemical Disease
91349281 Label mismatch:  Chemical Chemical Disease
91417912 Label mismatch:  Chemical Chemical Disease
91555251 Label mismatch:  Chemical Chemical Disease
91576649 Label mismatch:  Chemical Chemical Disease
91576667 Label mismatch:  Chemical Chemical Disease
91600205 Label mismatch:  Chemical Chemical Disease
91695615 Label mismatch:  Chemical Chemical Disease
91703172 Label mismatch:  Chemical Chemical Disease
91703210 Label mismatch:  Chemical Chemical Disease
92034058 Label mismatch:  Chemical Chemical Disease
92034071 Label mismatch:  Chemical Chemical Disease
92034183 Label mismatch:  Chemical Chemical Disease
92034231 Label mismatch:  Chemical Chemical Disease
92065493 Lab

100235030 Label mismatch:  Chemical Chemical Disease
100245111 Label mismatch:  Chemical Chemical Disease
100280263 Label mismatch:  Chemical Chemical Disease
100317719 Label mismatch:  Chemical Chemical Disease
100438649 Label mismatch:  Chemical Chemical Disease
100450132 Label mismatch:  Chemical Chemical Disease
100494325 Label mismatch:  Chemical Chemical Disease
100548531 Label mismatch:  Chemical Chemical Disease
100548540 Label mismatch:  Chemical Chemical Disease
100548589 Label mismatch:  Chemical Chemical Disease
100548640 Label mismatch:  Chemical Chemical Disease
100548660 Label mismatch:  Chemical Chemical Disease
100548700 Label mismatch:  Chemical Chemical Disease
100548742 Label mismatch:  Chemical Chemical Disease
100548763 Label mismatch:  Chemical Chemical Disease
100548787 Label mismatch:  Chemical Chemical Disease
100567210 Label mismatch:  Chemical Chemical Disease
100586296 Label mismatch:  Chemical Chemical Disease
100671564 Label mismatch:  Chemical Chemical D

110811882 Label mismatch:  Chemical Chemical Disease
110811943 Label mismatch:  Chemical Chemical Disease
110811984 Label mismatch:  Chemical Chemical Disease
110812019 Label mismatch:  Chemical Chemical Disease
110812039 Label mismatch:  Chemical Chemical Disease
110812075 Label mismatch:  Chemical Chemical Disease
110812129 Label mismatch:  Chemical Chemical Disease
111233190 Label mismatch:  Chemical Chemical Disease
111245242 Label mismatch:  Chemical Chemical Disease
111248760 Label mismatch:  Chemical Chemical Disease
111262667 Label mismatch:  Chemical Chemical Disease
111262774 Label mismatch:  Chemical Chemical Disease
111262860 Label mismatch:  Chemical Chemical Disease
111394440 Label mismatch:  Disease Disease Chemical
111421977 Label mismatch:  Chemical Chemical Disease
111534993 Label mismatch:  Chemical Chemical Disease
111577880 Label mismatch:  Chemical Chemical Disease
111579832 Label mismatch:  Disease Disease Chemical
111825076 Label mismatch:  Chemical Chemical Dis

124447681 Label mismatch:  Chemical Chemical Disease
124447726 Label mismatch:  Chemical Chemical Disease
124447791 Label mismatch:  Chemical Chemical Disease
124447834 Label mismatch:  Chemical Chemical Disease
124447981 Label mismatch:  Chemical Chemical Disease
124448107 Label mismatch:  Chemical Chemical Disease
124502218 Label mismatch:  Chemical Chemical Disease
124502255 Label mismatch:  Chemical Chemical Disease
124502323 Label mismatch:  Chemical Chemical Disease
124502398 Label mismatch:  Chemical Chemical Disease
124502437 Label mismatch:  Chemical Chemical Disease
124581122 Label mismatch:  Chemical Chemical Disease
124672743 Label mismatch:  Chemical Chemical Disease
124672760 Label mismatch:  Chemical Chemical Disease
124687314 Label mismatch:  Chemical Chemical Disease
124746340 Label mismatch:  Chemical Chemical Disease
124754933 Label mismatch:  Chemical Chemical Disease
124776702 Label mismatch:  Chemical Chemical Disease
124939539 Label mismatch:  Chemical Chemical D

133169178 Label mismatch:  Chemical Chemical Disease
133234709 Label mismatch:  Chemical Chemical Disease
133234816 Label mismatch:  Chemical Chemical Disease
133308543 Label mismatch:  Chemical Chemical Disease
133308599 Label mismatch:  Chemical Chemical Disease
133444904 Label mismatch:  Chemical Chemical Disease
133581140 Label mismatch:  Chemical Chemical Disease
133624689 Label mismatch:  Chemical Chemical Disease
133672129 Label mismatch:  Chemical Chemical Disease
133672188 Label mismatch:  Chemical Chemical Disease
133705018 Label mismatch:  Chemical Chemical Disease
133794037 Label mismatch:  Chemical Chemical Disease
133819730 Label mismatch:  Chemical Chemical Disease
133845400 Label mismatch:  Chemical Chemical Disease
134030902 Label mismatch:  Chemical Chemical Disease
134049458 Label mismatch:  Chemical Chemical Disease
134433649 Label mismatch:  Chemical Chemical Disease
134513190 Label mismatch:  Chemical Chemical Disease
134610617 Label mismatch:  Chemical Chemical D

147233170 Label mismatch:  Chemical Chemical Disease
147355967 Label mismatch:  Chemical Chemical Disease
147392029 Label mismatch:  Chemical Chemical Disease
147523570 Label mismatch:  Chemical Chemical Disease
147523596 Label mismatch:  Chemical Chemical Disease
147523625 Label mismatch:  Chemical Chemical Disease
147523657 Label mismatch:  Chemical Chemical Disease
147523720 Label mismatch:  Chemical Chemical Disease
147564309 Label mismatch:  Chemical Chemical Disease
147778869 Label mismatch:  Chemical Chemical Disease
147815228 Label mismatch:  Chemical Chemical Disease
148110089 Label mismatch:  Chemical Chemical Disease
148186015 Label mismatch:  Chemical Chemical Disease
148317470 Label mismatch:  Chemical Chemical Disease
148357093 Label mismatch:  Chemical Chemical Disease
148360079 Label mismatch:  Chemical Chemical Disease
148375620 Label mismatch:  Chemical Chemical Disease
148416356 Label mismatch:  Chemical Chemical Disease
148416378 Label mismatch:  Chemical Chemical D

158965465 Label mismatch:  Chemical Chemical Disease
158965521 Label mismatch:  Chemical Chemical Disease
158990318 Label mismatch:  Chemical Chemical Disease
159034813 Label mismatch:  Chemical Chemical Disease
159049268 Label mismatch:  Chemical Chemical Disease
159063450 Label mismatch:  Chemical Chemical Disease
159150272 Label mismatch:  Chemical Chemical Disease
159176653 Label mismatch:  Chemical Chemical Disease
159176729 Label mismatch:  Chemical Chemical Disease
159217442 Label mismatch:  Chemical Chemical Disease
159245706 Label mismatch:  Disease Disease Chemical
159361060 Label mismatch:  Chemical Chemical Disease
159365358 Label mismatch:  Disease Disease Chemical
159564071 Label mismatch:  Disease Disease Chemical
159636677 Label mismatch:  Chemical Chemical Disease
159636689 Label mismatch:  Chemical Chemical Disease
159636955 Label mismatch:  Chemical Chemical Disease
159675754 Label mismatch:  Chemical Chemical Disease
159881381 Label mismatch:  Chemical Chemical Dise

171346047 Label mismatch:  Disease Disease Chemical
171346107 Label mismatch:  Disease Disease Chemical
171369966 Label mismatch:  Chemical Chemical Disease
171466153 Label mismatch:  Chemical Chemical Disease
171566516 Label mismatch:  Chemical Chemical Disease
171688043 Label mismatch:  Chemical Chemical Disease
171688102 Label mismatch:  Chemical Chemical Disease
171737725 Label mismatch:  Chemical Chemical Disease
171882292 Label mismatch:  Chemical Chemical Disease
171890179 Label mismatch:  Chemical Chemical Disease
171890423 Label mismatch:  Chemical Chemical Disease
171890526 Label mismatch:  Chemical Chemical Disease
171998654 Label mismatch:  Chemical Chemical Disease
172136791 Label mismatch:  Chemical Chemical Disease
172204286 Label mismatch:  Chemical Chemical Disease
172204530 Label mismatch:  Chemical Chemical Disease
172212195 Label mismatch:  Chemical Chemical Disease
172265066 Label mismatch:  Chemical Chemical Disease
172381502 Label mismatch:  Chemical Chemical Dis

180632763 Label mismatch:  Chemical Chemical Disease
180779576 Label mismatch:  Disease Disease Chemical
180972399 Label mismatch:  Chemical Chemical Disease
181014601 Label mismatch:  Chemical Chemical Disease
181116187 Label mismatch:  Disease Disease Chemical
181192057 Label mismatch:  Chemical Chemical Disease
181304158 Label mismatch:  Disease Disease Chemical
181347245 Label mismatch:  Chemical Chemical Disease
181500968 Label mismatch:  Chemical Chemical Disease
181673814 Label mismatch:  Chemical Chemical Disease
181714375 Label mismatch:  Chemical Chemical Disease
181862073 Label mismatch:  Chemical Chemical Disease
181875747 Label mismatch:  Chemical Chemical Disease
182051339 Label mismatch:  Disease Disease Chemical
182114237 Label mismatch:  Chemical Chemical Disease
182153795 Label mismatch:  Chemical Chemical Disease
182242470 Label mismatch:  Chemical Chemical Disease
182339531 Label mismatch:  Chemical Chemical Disease
182339623 Label mismatch:  Chemical Chemical Disea

192993738 Label mismatch:  Chemical Chemical Disease
193249652 Label mismatch:  Chemical Chemical Disease
193289248 Label mismatch:  Chemical Chemical Disease
193552115 Label mismatch:  Chemical Chemical Disease
193566547 Label mismatch:  Chemical Chemical Disease
193737478 Label mismatch:  Chemical Chemical Disease
193879723 Label mismatch:  Disease Disease Chemical
193946887 Label mismatch:  Chemical Chemical Disease
193947117 Label mismatch:  Chemical Chemical Disease
193986487 Label mismatch:  Chemical Chemical Disease
193986534 Label mismatch:  Chemical Chemical Disease
193986558 Label mismatch:  Chemical Chemical Disease
193986597 Label mismatch:  Chemical Chemical Disease
193986619 Label mismatch:  Chemical Chemical Disease
193986646 Label mismatch:  Chemical Chemical Disease
193986708 Label mismatch:  Chemical Chemical Disease
194176163 Label mismatch:  Chemical Chemical Disease
194606505 Label mismatch:  Chemical Chemical Disease
194647362 Label mismatch:  Chemical Chemical Di

206674361 Label mismatch:  Chemical Chemical Disease
206795252 Label mismatch:  Chemical Chemical Disease
206800179 Label mismatch:  Chemical Chemical Disease
207045195 Label mismatch:  Disease Disease Chemical
207169692 Label mismatch:  Chemical Chemical Disease
207287640 Label mismatch:  Disease Disease Chemical
207296340 Label mismatch:  Chemical Chemical Disease
207424093 Label mismatch:  Chemical Chemical Disease
207424114 Label mismatch:  Chemical Chemical Disease
207427086 Label mismatch:  Chemical Chemical Disease
207595857 Label mismatch:  Chemical Chemical Disease
207817236 Label mismatch:  Chemical Chemical Disease
207891505 Label mismatch:  Chemical Chemical Disease
207892173 Label mismatch:  Disease Disease Chemical
207899455 Label mismatch:  Chemical Chemical Disease
208179326 Label mismatch:  Chemical Chemical Disease
208280848 Label mismatch:  Disease Disease Chemical
208601328 Label mismatch:  Chemical Chemical Disease
208743551 Label mismatch:  Chemical Chemical Disea

221358705 Label mismatch:  Chemical Chemical Disease
221409530 Label mismatch:  Chemical Chemical Disease
221449792 Label mismatch:  Chemical Chemical Disease
221678033 Label mismatch:  Chemical Chemical Disease
221820359 Label mismatch:  Chemical Chemical Disease
221870836 Label mismatch:  Chemical Chemical Disease
221878851 Label mismatch:  Disease Disease Chemical
221942543 Label mismatch:  Chemical Chemical Disease
221982964 Label mismatch:  Disease Disease Chemical
222164505 Label mismatch:  Chemical Chemical Disease
222164550 Label mismatch:  Chemical Chemical Disease
222402555 Label mismatch:  Chemical Chemical Disease
222415730 Label mismatch:  Disease Disease Chemical
222617027 Label mismatch:  Chemical Chemical Disease
222617190 Label mismatch:  Chemical Chemical Disease
222820990 Label mismatch:  Chemical Chemical Disease
222848010 Label mismatch:  Chemical Chemical Disease
222859677 Label mismatch:  Disease Disease Chemical
222859974 Label mismatch:  Disease Disease Chemica

234542060 Label mismatch:  Chemical Chemical Disease
234620482 Label mismatch:  Chemical Chemical Disease
234992918 Label mismatch:  Chemical Chemical Disease
235225663 Label mismatch:  Chemical Chemical Disease
235284557 Label mismatch:  Chemical Chemical Disease
235284622 Label mismatch:  Chemical Chemical Disease
235284783 Label mismatch:  Chemical Chemical Disease
235318221 Label mismatch:  Chemical Chemical Disease
235323786 Label mismatch:  Chemical Chemical Disease
235391167 Label mismatch:  Chemical Chemical Disease
235553878 Label mismatch:  Chemical Chemical Disease
235693447 Label mismatch:  Chemical Chemical Disease
236070103 Label mismatch:  Disease Disease Chemical
236076659 Label mismatch:  Chemical Chemical Disease
236118736 Label mismatch:  Chemical Chemical Disease
236247416 Label mismatch:  Chemical Chemical Disease
236257550 Label mismatch:  Chemical Chemical Disease
236299053 Label mismatch:  Chemical Chemical Disease
236299219 Label mismatch:  Chemical Chemical Di

243477862 Label mismatch:  Chemical Chemical Disease
244023762 Label mismatch:  Chemical Chemical Disease
244082670 Label mismatch:  Chemical Chemical Disease
244302379 Label mismatch:  Chemical Chemical Disease
244302485 Label mismatch:  Chemical Chemical Disease
244302542 Label mismatch:  Chemical Chemical Disease
244303472 Label mismatch:  Chemical Chemical Disease
244303665 Label mismatch:  Chemical Chemical Disease
244389043 Label mismatch:  Chemical Chemical Disease
244513288 Label mismatch:  Chemical Chemical Disease
244611801 Label mismatch:  Chemical Chemical Disease
244611839 Label mismatch:  Chemical Chemical Disease
244611956 Label mismatch:  Chemical Chemical Disease
244844427 Label mismatch:  Chemical Chemical Disease
244845443 Label mismatch:  Chemical Chemical Disease
244845517 Label mismatch:  Chemical Chemical Disease
244845568 Label mismatch:  Chemical Chemical Disease
244845617 Label mismatch:  Chemical Chemical Disease
244845665 Label mismatch:  Chemical Chemical D

252858586 Label mismatch:  Chemical Chemical Disease
252912320 Label mismatch:  Chemical Chemical Disease
252925770 Label mismatch:  Chemical Chemical Disease
253113840 Label mismatch:  Chemical Chemical Disease
253144909 Label mismatch:  Chemical Chemical Disease
253231924 Label mismatch:  Chemical Chemical Disease
253244671 Label mismatch:  Chemical Chemical Disease
253352676 Label mismatch:  Chemical Chemical Disease
253352684 Label mismatch:  Chemical Chemical Disease
253352702 Label mismatch:  Chemical Chemical Disease
253352718 Label mismatch:  Chemical Chemical Disease
253352731 Label mismatch:  Chemical Chemical Disease
253395754 Label mismatch:  Chemical Chemical Disease
253409806 Label mismatch:  Chemical Chemical Disease
253477818 Label mismatch:  Chemical Chemical Disease
253477840 Label mismatch:  Chemical Chemical Disease
253477854 Label mismatch:  Chemical Chemical Disease
253477905 Label mismatch:  Chemical Chemical Disease
253536853 Label mismatch:  Chemical Chemical D

260381031 Label mismatch:  Chemical Chemical Disease
260381062 Label mismatch:  Chemical Chemical Disease
260381090 Label mismatch:  Chemical Chemical Disease
260381099 Label mismatch:  Chemical Chemical Disease
260381109 Label mismatch:  Chemical Chemical Disease
260381152 Label mismatch:  Chemical Chemical Disease
260381159 Label mismatch:  Chemical Chemical Disease
260381208 Label mismatch:  Chemical Chemical Disease
260430108 Label mismatch:  Chemical Chemical Disease
260434018 Label mismatch:  Chemical Chemical Disease
260470638 Label mismatch:  Chemical Chemical Disease
260470737 Label mismatch:  Chemical Chemical Disease
260481163 Label mismatch:  Chemical Chemical Disease
260481633 Label mismatch:  Chemical Chemical Disease
260497612 Label mismatch:  Chemical Chemical Disease
260497679 Label mismatch:  Chemical Chemical Disease
260581824 Label mismatch:  Chemical Chemical Disease
260585628 Label mismatch:  Chemical Chemical Disease
260596780 Label mismatch:  Chemical Chemical D

266650757 Label mismatch:  Chemical Chemical Disease
266650883 Label mismatch:  Chemical Chemical Disease
266650919 Label mismatch:  Chemical Chemical Disease
266777273 Label mismatch:  Chemical Chemical Disease
266937290 Label mismatch:  Chemical Chemical Disease
267092530 Label mismatch:  Chemical Chemical Disease
267112889 Label mismatch:  Chemical Chemical Disease
267196696 Label mismatch:  Chemical Chemical Disease
267196783 Label mismatch:  Chemical Chemical Disease
267196831 Label mismatch:  Chemical Chemical Disease
267196944 Label mismatch:  Chemical Chemical Disease
267246800 Label mismatch:  Chemical Chemical Disease
267504697 Label mismatch:  Chemical Chemical Disease
267721705 Label mismatch:  Chemical Chemical Disease
267819920 Label mismatch:  Chemical Chemical Disease
267849328 Label mismatch:  Chemical Chemical Disease
267911577 Label mismatch:  Chemical Chemical Disease
267966781 Label mismatch:  Chemical Chemical Disease
267966912 Label mismatch:  Chemical Chemical D

277176573 Label mismatch:  Chemical Chemical Disease
277179418 Label mismatch:  Chemical Chemical Disease
277300636 Label mismatch:  Chemical Chemical Disease
277364520 Label mismatch:  Chemical Chemical Disease
277364589 Label mismatch:  Chemical Chemical Disease
277508430 Label mismatch:  Chemical Chemical Disease
277816122 Label mismatch:  Chemical Chemical Disease
277885154 Label mismatch:  Chemical Chemical Disease
277907524 Label mismatch:  Chemical Chemical Disease
278094020 Label mismatch:  Chemical Chemical Disease
278097929 Label mismatch:  Chemical Chemical Disease
278218524 Label mismatch:  Chemical Chemical Disease
278238427 Label mismatch:  Chemical Chemical Disease
278516634 Label mismatch:  Chemical Chemical Disease
278516668 Label mismatch:  Chemical Chemical Disease
278516735 Label mismatch:  Chemical Chemical Disease
278516885 Label mismatch:  Chemical Chemical Disease
278516887 Label mismatch:  Chemical Chemical Disease
278611683 Label mismatch:  Chemical Chemical D

286681971 Label mismatch:  Chemical Chemical Disease
286741075 Label mismatch:  Chemical Chemical Disease
286834001 Label mismatch:  Chemical Chemical Disease
286847677 Label mismatch:  Chemical Chemical Disease
286953194 Label mismatch:  Chemical Chemical Disease
286968225 Label mismatch:  Chemical Chemical Disease
287217857 Label mismatch:  Chemical Chemical Disease
287290248 Label mismatch:  Chemical Chemical Disease
287369137 Label mismatch:  Chemical Chemical Disease
287369167 Label mismatch:  Chemical Chemical Disease
287369204 Label mismatch:  Chemical Chemical Disease
287369224 Label mismatch:  Chemical Chemical Disease
287369231 Label mismatch:  Chemical Chemical Disease
287369239 Label mismatch:  Chemical Chemical Disease
287369287 Label mismatch:  Chemical Chemical Disease
287770585 Label mismatch:  Chemical Chemical Disease
287834446 Label mismatch:  Chemical Chemical Disease
287834513 Label mismatch:  Chemical Chemical Disease
288099608 Label mismatch:  Chemical Chemical D

299029976 Label mismatch:  Chemical Chemical Disease
299030142 Label mismatch:  Chemical Chemical Disease
299200183 Label mismatch:  Chemical Chemical Disease
299282233 Label mismatch:  Chemical Chemical Disease
299396349 Label mismatch:  Chemical Chemical Disease
299396380 Label mismatch:  Chemical Chemical Disease
299414337 Label mismatch:  Chemical Chemical Disease
299603774 Label mismatch:  Chemical Chemical Disease
299603895 Label mismatch:  Chemical Chemical Disease
299604088 Label mismatch:  Chemical Chemical Disease
299614063 Label mismatch:  Chemical Chemical Disease
299614093 Label mismatch:  Chemical Chemical Disease
299614125 Label mismatch:  Chemical Chemical Disease
299614149 Label mismatch:  Chemical Chemical Disease
299639038 Label mismatch:  Chemical Chemical Disease
299685814 Label mismatch:  Chemical Chemical Disease
299710966 Label mismatch:  Chemical Chemical Disease
299750623 Label mismatch:  Chemical Chemical Disease
99.6396588201  % processed; time so far:  1413

In [10]:
#############################################################################################################
# Check concepts without beginning (on target PubMed)
# #concepts without beginning tag (B-...) = 5 if COUNTED ONLY IF THEY DO NOT APPEAR AT THE BEGINNING OF SENTENCE
# otherwise: 452 is the total #concepts without beginning tag
#############################################################################################################

import numpy as np
import matplotlib.pyplot as plt
import time
import glob

# Dictionaries of total counts of occurrences of chemical/disease: 
# key=chemicalCode/diseaseCode, value=count 
# if chemical/disease is not in this dictionary then assume zero count
chemCnts = {} 
diseCnts = {}
# Dictionary of cooccurrences: key=chemicalCode+diseaseCode, value=count
cooc = {}     

TMP_chemCnts = {} # dictionaries as above, but temporary, for current sentence only
TMP_diseCnts = {}

tmpStr = '' # temporary string storing a current concept being parsed

########################################################################################
# HELPER FUNCTIONS 

# Abstract the given full concept name (c)
def abstract_concept(c):
    # Remove the following characters from (c)
    rmv_chars = " '-,/*+.!@#$%^&*()_={}[];\:<>?" 
    #rmv_chars = " " 
    c = c.translate(None, rmv_chars)
    c = c.translate(None, '"') # quotes are removed separately
    # Convert to lower case
    c = c.lower()
    return c

# End of concept check & update: update counts once a whole concept is parsed
def eoc():
    
    global tmpStr
    global TMP_diseCnts
    global TMP_chemCnts
    global cnt_matched
    global cnt_NOT_matched

    if tmpStr != '': # yes, temporary string is not empty
        # Simple approximate string matching
        ac = abstract_concept(tmpStr)
        # Is this abstracted concept in our grounding dictionary ?
        if ac in ground_dict:
            matched_MESH_concept = ground_dict[ac]
            mLabel = matched_MESH_concept[1]  # matched label = chemical / disease   
            mCode =  matched_MESH_concept[2]  # matched code of chemical / disease
            #print "YES: ", tmpStr, "<==>", matched_MESH_concept[0], "\t *** ", ac
            
            # UPDATE COUNTS for this concept
            if mLabel == 'Disease':
                if mCode not in TMP_diseCnts.keys():
                    TMP_diseCnts[mCode] = 1
                else:
                    TMP_diseCnts[mCode] += 1
            elif mLabel == 'Chemical':
                if mCode not in TMP_chemCnts.keys():
                    TMP_chemCnts[mCode] = 1
                else:
                    TMP_chemCnts[mCode] += 1
            else:
                raise ValueError('Unknown concept!')

            cnt_matched += 1
        else:
            #print "NO :", tmpStr
            cnt_NOT_matched += 1
        
        tmpStr = ''
        
# End of sentence update: update total counts based on temporary counts from this sentence, once a whole sentence is parsed
def eos():
    
    global chemCnts
    global diseCnts
    global cooc
    
    for chem in TMP_chemCnts.keys():
        # Update chemical counts
        if chem not in chemCnts.keys(): 
            chemCnts[chem] = 1
        else:
            chemCnts[chem] += 1

        for dise in TMP_diseCnts.keys():
            # Update disease counts
            if dise not in diseCnts.keys():
                diseCnts[dise] = 1
            else:
                diseCnts[dise] += 1

            # Update co-occurences - minimum of 2x in total will be considered by filtering later
            k = chem + '+' + dise # construct key into dictionary of cooccurences: chemicalCode+diseaseCode, always this ordering
            if k not in cooc.keys():
                cooc[k] = 1
            else:
                cooc[k] += 1

# END OF HELPER FUNCTIONS 
########################################################################################

# Load MESH concepts: <name, type (Chemical or Disease), code>
MESH = np.loadtxt('./../Dataset/CDR_MeSH.tsv', delimiter='\t', skiprows=0, dtype=str) # <name, label, code>

# Construct the Gounding dictionary
# maps grounded/abstracted concept to list [full concept name, type (Chemical or Disease), code]
ground_dict = {}
for r in MESH:
    ac = abstract_concept(r[0])
    ground_dict[ac] = r
    
########################################################################################
########################################################################################
                    
# Iterate over PubMed abstracts and associated tags (produced by named entity recogniser)

st = time.time()
N_sentences = 0      # to count number of sentences
LINE_NUM = 0
cnt_matched = 0      # count matched concepts
cnt_NOT_matched = 0  # count not-matched concepts
cnt_cwb = 0          # count concepts without beginning tag B-...

with open('/home/janciovec/Desktop/pubmed-abstracts.conll') as f_NE:     # named entities;  301,084,933 lines, using grep -c '' pubmed-abstracts_tags
    with open('/home/janciovec/Desktop/pubmed-abstracts_tags') as f_TA:  # associated tags; 301,084,933 lines
        
        # Iterate over all lines
        while True: 
            
            LINE_NUM += 1

            # Read line - named entity
            rl = f_NE.readline()
            rlwn = rl.split('\n')[0] # read line skipping the new line character

            # End of file
            if rl == '':
                if f_TA.readline() != '':
                    raise ValueError("File with tags does not match the file with named entities!")
                # End of concept check & update
                eoc()
                # End of sentence update
                eos()
                break

            # End of sentence
            elif rlwn == '':
                if f_TA.readline() != '\n':
                    raise ValueError("File with tags does not match the file with named entities!")
                # End of concept check & update
                eoc()
                # End of sentence update
                eos()
                # Reset temporary counters for next sentence
                TMP_chemCnts = {}
                TMP_diseCnts = {}
                N_sentences += 1

            # Beginning or within sentence
            else:
                # Read associated tag
                tag = f_TA.readline().split('\n')[0]
                # Named entity - first item at the line
                NE = rlwn.split('\t')[0]
                
                # Beginning of concept
                if tag[0] == 'B': 
                    # End of concept check & update - since there could be a concept ending right now
                    eoc()
                    # Start new concept
                    tmpStr = NE
                    
                # Continuation of the concept name
                elif tag[0] == 'I': 
        
                    if tmpStr == '': # Concept without beginning: start new one from this string
#                         if TMP_chemCnts != {}:
#                             print LINE_NUM, "Concept without beginning! NOT BEGIN OF SENTENCE"
#                             cnt_cwb += 1
                        print LINE_NUM, "Concept without beginning! ALL CASES", tag
                        cnt_cwb += 1
                    
                    tmpStr = tmpStr + NE # concatenate words without a space between them
                    
                elif tag[0] == 'O':
                    # End of concept check & update
                    eoc()
                else:
                    raise ValueError("Unknown tag!")
                    
            if LINE_NUM % 30000000 == 0:
                print 100.*LINE_NUM/301084933, " % processed; time so far: ", time.time()-st, " s"

print "Time taken ...", time.time()-st, " s"

print "Total # concepts: ", cnt_NOT_matched + cnt_matched
print "Matched concepts: ", cnt_matched

print "Concepts without beginning tag (B-...): ", cnt_cwb

print "Total # sentences: ", N_sentences

##############################################################################################################
# Total time: 1568.43784904 seconds = 26.14 minutes (with printing), 20.42 min
# Total # concepts:   9,105,005
# Matched concepts:   5,940,715
# Concepts without beginning tag (B-...):  452
# Total # sentences: 10,573,978

# To give examples of concepts witout beginning
# sed -n '1750573,1750577p' < pubmed-abstracts_tags
# sed -n '1750573,1750577p' < pubmed-abstracts.conll

1750575 Concept without beginning! ALL CASES I-Disease
2788688 Concept without beginning! ALL CASES I-Chemical
3387645 Concept without beginning! ALL CASES I-Disease
3701988 Concept without beginning! ALL CASES I-Chemical
4262071 Concept without beginning! ALL CASES I-Disease
4430739 Concept without beginning! ALL CASES I-Disease
5522210 Concept without beginning! ALL CASES I-Disease
6175482 Concept without beginning! ALL CASES I-Chemical
6175655 Concept without beginning! ALL CASES I-Chemical
7790449 Concept without beginning! ALL CASES I-Disease
7965579 Concept without beginning! ALL CASES I-Chemical
8317729 Concept without beginning! ALL CASES I-Disease
9115904 Concept without beginning! ALL CASES I-Disease
9323695 Concept without beginning! ALL CASES I-Chemical
10325323 Concept without beginning! ALL CASES I-Chemical
10604145 Concept without beginning! ALL CASES I-Chemical
10752818 Concept without beginning! ALL CASES I-Chemical
11628326 Concept without beginning! ALL CASES I-Chemi

102794818 Concept without beginning! ALL CASES I-Chemical
102807207 Concept without beginning! ALL CASES I-Chemical
103729368 Concept without beginning! ALL CASES I-Disease
104777377 Concept without beginning! ALL CASES I-Chemical
105331266 Concept without beginning! ALL CASES I-Chemical
105648098 Concept without beginning! ALL CASES I-Disease
107101231 Concept without beginning! ALL CASES I-Disease
107300485 Concept without beginning! ALL CASES I-Disease
107472315 Concept without beginning! ALL CASES I-Chemical
107713280 Concept without beginning! ALL CASES I-Chemical
107913119 Concept without beginning! ALL CASES I-Chemical
107913156 Concept without beginning! ALL CASES I-Chemical
109648628 Concept without beginning! ALL CASES I-Disease
109648678 Concept without beginning! ALL CASES I-Disease
110369449 Concept without beginning! ALL CASES I-Chemical
110808432 Concept without beginning! ALL CASES I-Chemical
111142531 Concept without beginning! ALL CASES I-Chemical
111961888 Concept wi

187517233 Concept without beginning! ALL CASES I-Chemical
187594382 Concept without beginning! ALL CASES I-Chemical
187688064 Concept without beginning! ALL CASES I-Disease
189014163 Concept without beginning! ALL CASES I-Disease
189515511 Concept without beginning! ALL CASES I-Chemical
189609348 Concept without beginning! ALL CASES I-Disease
190004631 Concept without beginning! ALL CASES I-Disease
190887521 Concept without beginning! ALL CASES I-Chemical
192175944 Concept without beginning! ALL CASES I-Disease
192303686 Concept without beginning! ALL CASES I-Disease
192348547 Concept without beginning! ALL CASES I-Chemical
192348585 Concept without beginning! ALL CASES I-Chemical
193415809 Concept without beginning! ALL CASES I-Chemical
193753127 Concept without beginning! ALL CASES I-Disease
194037951 Concept without beginning! ALL CASES I-Disease
194399353 Concept without beginning! ALL CASES I-Chemical
195231539 Concept without beginning! ALL CASES I-Disease
195332457 Concept witho

284736929 Concept without beginning! ALL CASES I-Disease
285586130 Concept without beginning! ALL CASES I-Chemical
286071526 Concept without beginning! ALL CASES I-Disease
286196961 Concept without beginning! ALL CASES I-Chemical
286668365 Concept without beginning! ALL CASES I-Chemical
287261024 Concept without beginning! ALL CASES I-Disease
287473215 Concept without beginning! ALL CASES I-Chemical
287493237 Concept without beginning! ALL CASES I-Chemical
288367511 Concept without beginning! ALL CASES I-Disease
289733315 Concept without beginning! ALL CASES I-Chemical
290087129 Concept without beginning! ALL CASES I-Disease
291171784 Concept without beginning! ALL CASES I-Chemical
291600334 Concept without beginning! ALL CASES I-Chemical
292310442 Concept without beginning! ALL CASES I-Disease
293915053 Concept without beginning! ALL CASES I-Disease
293943886 Concept without beginning! ALL CASES I-Chemical
294582644 Concept without beginning! ALL CASES I-Chemical
294869662 Concept wit