In [1]:
################################################################################################
# Extracting chemical-disease associations from the biological literature
# R214: Main Practical
# Jan Ondras (jo356), Trinity College
################################################################################################
# Grounding of named entities to MESH concepts 
# using Simple approximate string matching (SASM)
################################################################################################

import numpy as np
import matplotlib.pyplot as plt
import time
import glob

# Set which partition to use for evaluation
dataset_type = 'train' # train, devel, or test
dataset_type = 'devel'
dataset_type = 'test'

# Load tagged named entities, preserving sentence segmentation
NE = []
ii = 0
with open('./bmip-2018-master/BC5-CDR/conll/' + dataset_type + '.tsv') as f:
    rl = f.readline()
    while rl != '':
        ii += 1
        rlwn = rl.split('\n')[0] # read line skipping the new line character
        
        # End of sentence
        if rlwn == '':
            NE.append([])
            #print ii, "EOS" # end of sentence
        # Beginning or within sentence
        else:           
            NE.append(rlwn.split('\t'))
            #print ii, NE[-1]
        rl = f.readline()

# Load MESH concepts: <name, type (Chemical or Disease), code>
MESH = np.loadtxt('./../Dataset/CDR_MeSH.tsv', delimiter='\t', skiprows=0, dtype=str) 

In [2]:
# Print sample MeSH entries
print MESH[0:300]

[['11-deoxycortisol' 'Chemical' 'D003350']
 ['1,1-dichloro-2,2,2-trifluoroethane' 'Chemical' 'C067411']
 ['1-[2-(3,4-dichlorophenyl)ethyl]-4-methylpiperazine' 'Chemical' 'C093337']
 ['1,2-dimethylhydrazine' 'Chemical' 'D019813']
 ['1,2-DMH' 'Chemical' 'D019813']
 ['1-(2-methoxyphenyl)-4-[4-(2-phthalimido)butyl]piperazine' 'Chemical'
  'C058895']
 ['1,3-bis-(2-chloroethyl)-1-nitrosourea' 'Chemical' 'D002330']
 ['17beta-estradiol' 'Chemical' 'D004958']
 ['17beta-Estradiol' 'Chemical' 'D004958']
 ['1-bromo-1-chloro-2,2,2-trifluoroethane' 'Chemical' 'D006221']
 ['1-chloro-1,2,2,2-tetrafluoroethane' 'Chemical' 'C072959']
 ['2-acetylaminofluorene' 'Chemical' 'D015073']
 ['2-chloroprocaine-CE' 'Chemical' 'C004616']
 ['2-methoxy-4-amino-5-chlorobenzoic acid 2-(diethylamino)ethyl ester'
  'Chemical' 'C072790']
 ['2-methyl-6-(phenylethynyl)pyridine' 'Chemical' 'C121465']
 ['2PAM' 'Chemical' 'D011220']
 ['3,4-dihydroxyphenylacetic acid' 'Chemical' 'D015102']
 ['3,4-methylenedioxymethamphetamine' 

In [3]:
################################################
# ABSTRACTION FUNCTION
# Abstract the given full concept name (c)
def abstract_concept(c):
    
    # Remove the following characters from (c)
    rmv_chars = " '-,/*+.!@#$%^&*()_={}[];\:<>?"
    #rmv_chars = " " 
    c = c.translate(None, rmv_chars)
    c = c.translate(None, '"') # quotes are removed separately
    # Convert to lower case
    c = c.lower()
    return c

################################################
# Construct the Gounding dictionary: 
# maps grounded/abstracted concept to list [full concept name, type (Chemical or Disease), code]
ground_dict = {}
for r in MESH:
    ac = abstract_concept(r[0])
    ground_dict[ac] = r

In [4]:
################################################################################################
# Iterate over tagged concepts and find matches with MESH concepts
# 5 experiments for each set: methods E1,E2,A1,A2,A3

#######################
# on training set ---- total 9385 concepts

# 7113 matches with exact matching, concatenate words without a space between them, on training set
# 8779 matches with exact matching, concatenate words with a space between them,    on training set
# 9309 matches with approx match: remove spaces
# 9309 matches with approx match: remove spaces + to lower case ---- no improvement as the MESH dictionary already handles this
# 9309 matches with approx match: above + tried to remove all these '-,/*+.!@#$%^&*()_={}[];\:<>?" but without improvement

#######################
# on development set ---- total 9591 concepts

# 5037 matches with exact matching, concatenate words without a space between them, on training set
# 5682 matches with exact matching, concatenate words with a space between them,    on training set
# 5848 matches with approx match: remove spaces
# 6085 matches with approx match: remove spaces + to lower case ---- improvement!!! => in training set the lower and upper case concepts match the possibilities present in MESH; but this is not the case for testing set => better to convert to lower case!
# 6098 matches with approx match: all above + remove all these '-,/*+.!@#$%^&*()_={}[];\:<>?" with improvement

###################
# on testing set ---- total 9809 concepts

# 5014 matches with exact matching, concatenate words without a space between them, on training set
# 5682 matches with exact matching, concatenate words with a space between them,    on training set
# 5845 matches with approx match: remove spaces
# 6101 matches with approx match: remove spaces + to lower case ---- improvement!!! => in training set the lower and upper case concepts match the possibilities present in MESH; but this is not the case for testing set => better to convert to lower case!
# 6134 matches with approx match: all above + remove all these '-,/*+.!@#$%^&*()_={}[];\:<>?" with improvement
#        31 of the improvement caused by char -

st = time.time()
cnt_matched = 0      # count matched concepts
cnt_NOT_matched = 0  # count not-matched concepts

all_AC = set()       # to count unique matched concepts
matched_AC = set()   # to count unique matched concepts

# If the character ch is present in one of the strings a,b but not both; used to generate some examples
def check(a,b):
    ch = '(' # to print matchings with particular character
    if ch in a and ch not in b:
        return True
    if ch not in a and ch in b:
        return True
    return False

i = 0
while i < len(NE):
    
    # End of sentence
    if NE[i] == []:
        i += 1
    
    # Beginning of chemical or disease concept
    elif NE[i][5][0] == 'B': # check tag beginning
        
        tmpStr = NE[i][0] # initialise temporary string for this concept
        i += 1
        
        # Continuation of this concept name         
        while True:
            if NE[i] == []:
                break
            if NE[i][5][0] != 'I':
                break
            #tmpStr = tmpStr + ' ' + NE[i][0]      # concatenate words WITH a space between them
            tmpStr = tmpStr + NE[i][0] # concatenate words without a space between them
            i += 1
        
        # Simple approximate string matching
        ac = abstract_concept(tmpStr)
        all_AC.add( ac )
        
        # Is this abstracted concept in our grounding dictionary ?
        if ac in ground_dict:
            matched_MESH_concept = ground_dict[ac]
#             if check(tmpStr, matched_MESH_concept[0]):
#                 print tmpStr, "<==>", matched_MESH_concept[0], "\t *** ", ac
            print "YES: ", tmpStr, "<==>", matched_MESH_concept, "\t *** ", ac
            cnt_matched += 1
            matched_AC.add( ac )
        else:
            #print "NO :", tmpStr
            cnt_NOT_matched += 1
            
    # Continuation of chemical or disease concept OR "other - O"
    else:
        i += 1

print "---------------------------------------------------\nDATASET", dataset_type
        
print "Total  : ", cnt_NOT_matched + cnt_matched
print "Matched: ", cnt_matched

print "Unique concept counts:"
print "Total  : ", len(all_AC)
print "Matched: ", len(matched_AC)

print "Time taken: ", time.time() - st, "sec"

YES:  ventriculartachycardia <==> ['ventricular tachycardia' 'Disease' 'D017180'] 	 ***  ventriculartachycardia
YES:  dobutamine <==> ['Dobutamine' 'Chemical' 'D004280'] 	 ***  dobutamine
YES:  dilatedcardiomyopathy <==> ['dilated cardiomyopathy' 'Disease' 'D002311'] 	 ***  dilatedcardiomyopathy
YES:  congestiveheartfailure <==> ['congestive heart failure' 'Disease' 'D006333'] 	 ***  congestiveheartfailure
YES:  heartfailure <==> ['heart failure' 'Disease' 'D006333'] 	 ***  heartfailure
YES:  dilatedcardiomyopathy <==> ['dilated cardiomyopathy' 'Disease' 'D002311'] 	 ***  dilatedcardiomyopathy
YES:  ventriculararrhythmias <==> ['ventricular arrhythmias' 'Disease' 'D001145'] 	 ***  ventriculararrhythmias
YES:  QTprolongation <==> ['Q-T prolongation' 'Disease' 'D008133'] 	 ***  qtprolongation
YES:  ventriculartachycardia <==> ['ventricular tachycardia' 'Disease' 'D017180'] 	 ***  ventriculartachycardia
YES:  dobutamine <==> ['Dobutamine' 'Chemical' 'D004280'] 	 ***  dobutamine
YES:  vent

YES:  stressincontinence <==> ['stress incontinence' 'Disease' 'D014550'] 	 ***  stressincontinence
YES:  headache <==> ['Headache' 'Disease' 'D006261'] 	 ***  headache
YES:  stressincontinence <==> ['stress incontinence' 'Disease' 'D014550'] 	 ***  stressincontinence
YES:  glucose <==> ['glucose' 'Chemical' 'D005947'] 	 ***  glucose
YES:  glucosuria <==> ['glucosuria' 'Disease' 'D006030'] 	 ***  glucosuria
YES:  glutamate <==> ['glutamate' 'Chemical' 'D018698'] 	 ***  glutamate
YES:  methionine <==> ['methionine' 'Chemical' 'D008715'] 	 ***  methionine
YES:  heartfailure <==> ['heart failure' 'Disease' 'D006333'] 	 ***  heartfailure
YES:  Lisinopril <==> ['lisinopril' 'Chemical' 'D017706'] 	 ***  lisinopril
YES:  angiotensin-convertingenzyme(ACE)inhibitors <==> ['angiotensin converting enzyme (ACE) inhibitors' 'Chemical' 'D000806'] 	 ***  angiotensinconvertingenzymeaceinhibitors
YES:  heartfailure <==> ['heart failure' 'Disease' 'D006333'] 	 ***  heartfailure
YES:  CHF <==> ['CHF' 'Di

YES:  estradiol <==> ['Estradiol' 'Chemical' 'D004958'] 	 ***  estradiol
YES:  bupivacaine <==> ['bupivacaine' 'Chemical' 'D002045'] 	 ***  bupivacaine
YES:  epinephrine <==> ['Epinephrine' 'Chemical' 'D004837'] 	 ***  epinephrine
YES:  progesterone <==> ['progesterone' 'Chemical' 'D011374'] 	 ***  progesterone
YES:  bupivacaine <==> ['bupivacaine' 'Chemical' 'D002045'] 	 ***  bupivacaine
YES:  pentobarbital <==> ['pentobarbital' 'Chemical' 'D010424'] 	 ***  pentobarbital
YES:  arrhythmia <==> ['arrhythmia' 'Disease' 'D001145'] 	 ***  arrhythmia
YES:  progesterone <==> ['progesterone' 'Chemical' 'D011374'] 	 ***  progesterone
YES:  bupivacaine <==> ['bupivacaine' 'Chemical' 'D002045'] 	 ***  bupivacaine
YES:  bupivacaine <==> ['bupivacaine' 'Chemical' 'D002045'] 	 ***  bupivacaine
YES:  arrhythmia <==> ['arrhythmia' 'Disease' 'D001145'] 	 ***  arrhythmia
YES:  acuteliverfailure <==> ['Acute liver failure' 'Disease' 'D017114'] 	 ***  acuteliverfailure
YES:  paracetamol <==> ['Paracetamo

YES:  stroke <==> ['Stroke' 'Disease' 'D020521'] 	 ***  stroke
YES:  breastcancer <==> ['breast cancer' 'Disease' 'D001943'] 	 ***  breastcancer
YES:  gallbladderdisease <==> ['gallbladder disease' 'Disease' 'D005705'] 	 ***  gallbladderdisease
YES:  stroke <==> ['Stroke' 'Disease' 'D020521'] 	 ***  stroke
YES:  gallbladderdisease <==> ['gallbladder disease' 'Disease' 'D005705'] 	 ***  gallbladderdisease
YES:  fractures <==> ['fractures' 'Disease' 'D050723'] 	 ***  fractures
YES:  dementia <==> ['dementia' 'Disease' 'D003704'] 	 ***  dementia
YES:  cardiovasculardisease <==> ['cardiovascular disease' 'Disease' 'D002318'] 	 ***  cardiovasculardisease
YES:  venousthromboembolism <==> ['venous thromboembolism' 'Disease' 'D054556'] 	 ***  venousthromboembolism
YES:  venousthromboembolism <==> ['venous thromboembolism' 'Disease' 'D054556'] 	 ***  venousthromboembolism
YES:  liverdamage <==> ['liver damage' 'Disease' 'D056486'] 	 ***  liverdamage
YES:  cholestatic <==> ['cholestatic' 'Diseas

YES:  cognitivedeficits <==> ['cognitive deficits' 'Disease' 'D003072'] 	 ***  cognitivedeficits
YES:  Caffeine <==> ['Caffeine' 'Chemical' 'D002110'] 	 ***  caffeine
YES:  panicdisorder <==> ['panic disorder' 'Disease' 'D016584'] 	 ***  panicdisorder
YES:  depression <==> ['Depression' 'Disease' 'D003866'] 	 ***  depression
YES:  panicdisorder <==> ['panic disorder' 'Disease' 'D016584'] 	 ***  panicdisorder
YES:  PD <==> ['PD' 'Disease' 'D010300'] 	 ***  pd
YES:  majordepression <==> ['major depression' 'Disease' 'D003865'] 	 ***  majordepression
YES:  caffeine <==> ['Caffeine' 'Chemical' 'D002110'] 	 ***  caffeine
YES:  PD <==> ['PD' 'Disease' 'D010300'] 	 ***  pd
YES:  majordepression <==> ['major depression' 'Disease' 'D003865'] 	 ***  majordepression
YES:  caffeine <==> ['Caffeine' 'Chemical' 'D002110'] 	 ***  caffeine
YES:  caffeine <==> ['Caffeine' 'Chemical' 'D002110'] 	 ***  caffeine
YES:  anxiety <==> ['anxiety' 'Disease' 'D001008'] 	 ***  anxiety
YES:  PD <==> ['PD' 'Disease

YES:  NSCLC <==> ['NSCLC' 'Disease' 'D002289'] 	 ***  nsclc
YES:  non-small-celllungcancer <==> ['non-small cell lung cancer' 'Disease' 'D002289'] 	 ***  nonsmallcelllungcancer
YES:  NSCLC <==> ['NSCLC' 'Disease' 'D002289'] 	 ***  nsclc
YES:  rapamycin <==> ['rapamycin' 'Chemical' 'D020123'] 	 ***  rapamycin
YES:  NSCLC <==> ['NSCLC' 'Disease' 'D002289'] 	 ***  nsclc
YES:  NSCLC <==> ['NSCLC' 'Disease' 'D002289'] 	 ***  nsclc
YES:  tyrosine <==> ['tyrosine' 'Chemical' 'D014443'] 	 ***  tyrosine
YES:  toxicity <==> ['Toxicity' 'Disease' 'D064420'] 	 ***  toxicity
YES:  tumor <==> ['tumor' 'Disease' 'D009369'] 	 ***  tumor
YES:  fatigue <==> ['fatigue' 'Disease' 'D005221'] 	 ***  fatigue
YES:  dyspnea <==> ['dyspnea' 'Disease' 'D004417'] 	 ***  dyspnea
YES:  stomatitis <==> ['stomatitis' 'Disease' 'D013280'] 	 ***  stomatitis
YES:  anemia <==> ['anemia' 'Disease' 'D000740'] 	 ***  anemia
YES:  thrombocytopenia <==> ['thrombocytopenia' 'Disease' 'D013921'] 	 ***  thrombocytopenia
YES:  Pn

YES:  vomiting <==> ['vomiting' 'Disease' 'D014839'] 	 ***  vomiting
YES:  fever <==> ['fever' 'Disease' 'D005334'] 	 ***  fever
YES:  Syncope <==> ['syncope' 'Disease' 'D013575'] 	 ***  syncope
YES:  hyperkalemia <==> ['Hyperkalemia' 'Disease' 'D006947'] 	 ***  hyperkalemia
YES:  angiotensin <==> ['Angiotensin' 'Chemical' 'D000809'] 	 ***  angiotensin
YES:  spironolactone <==> ['Spironolactone' 'Chemical' 'D013148'] 	 ***  spironolactone
YES:  myocardialinfarction <==> ['Myocardial infarction' 'Disease' 'D009203'] 	 ***  myocardialinfarction
YES:  lossofconsciousness <==> ['Loss of consciousness' 'Disease' 'D014474'] 	 ***  lossofconsciousness
YES:  bradycardia <==> ['Bradycardia' 'Disease' 'D001919'] 	 ***  bradycardia
YES:  hyperkalemia <==> ['Hyperkalemia' 'Disease' 'D006947'] 	 ***  hyperkalemia
YES:  potassium <==> ['Potassium' 'Chemical' 'D011188'] 	 ***  potassium
YES:  potassium <==> ['Potassium' 'Chemical' 'D011188'] 	 ***  potassium
YES:  hyperkalemia <==> ['Hyperkalemia' 'D

YES:  encephalopathy <==> ['Encephalopathy' 'Disease' 'D001927'] 	 ***  encephalopathy
YES:  nephropathy <==> ['nephropathy' 'Disease' 'D007674'] 	 ***  nephropathy
YES:  cancer <==> ['Cancer' 'Disease' 'D009369'] 	 ***  cancer
YES:  nephropathy <==> ['nephropathy' 'Disease' 'D007674'] 	 ***  nephropathy
YES:  cancer <==> ['Cancer' 'Disease' 'D009369'] 	 ***  cancer
YES:  acuterenalfailure <==> ['acute renal failure' 'Disease' 'D058186'] 	 ***  acuterenalfailure
YES:  creatinine <==> ['creatinine' 'Chemical' 'D003404'] 	 ***  creatinine
YES:  Cr <==> ['Cr' 'Chemical' 'D002857'] 	 ***  cr
YES:  Cr <==> ['Cr' 'Chemical' 'D002857'] 	 ***  cr
YES:  hypertension <==> ['Hypertension' 'Disease' 'D006973'] 	 ***  hypertension
YES:  cancer <==> ['Cancer' 'Disease' 'D009369'] 	 ***  cancer
YES:  Hypertension <==> ['Hypertension' 'Disease' 'D006973'] 	 ***  hypertension
YES:  nephropathy <==> ['nephropathy' 'Disease' 'D007674'] 	 ***  nephropathy
YES:  Hypertension <==> ['Hypertension' 'Disease' 

YES:  Paroxetine <==> ['paroxetine' 'Chemical' 'D017374'] 	 ***  paroxetine
YES:  Terfenadine <==> ['terfenadine' 'Chemical' 'D016593'] 	 ***  terfenadine
YES:  Citalopram <==> ['citalopram' 'Chemical' 'D015283'] 	 ***  citalopram
YES:  Terfenadine <==> ['terfenadine' 'Chemical' 'D016593'] 	 ***  terfenadine
YES:  Citalopram <==> ['citalopram' 'Chemical' 'D015283'] 	 ***  citalopram
YES:  TdP <==> ['TDP' 'Disease' 'D016171'] 	 ***  tdp
YES:  TdP <==> ['TDP' 'Disease' 'D016171'] 	 ***  tdp
YES:  toxicity <==> ['Toxicity' 'Disease' 'D064420'] 	 ***  toxicity
YES:  toxicity <==> ['Toxicity' 'Disease' 'D064420'] 	 ***  toxicity
YES:  toxicity <==> ['Toxicity' 'Disease' 'D064420'] 	 ***  toxicity
YES:  toxicity <==> ['Toxicity' 'Disease' 'D064420'] 	 ***  toxicity
YES:  Toxicity <==> ['Toxicity' 'Disease' 'D064420'] 	 ***  toxicity
YES:  ventricularseptaldefect <==> ['ventricular septal defect' 'Disease' 'D006345'] 	 ***  ventricularseptaldefect
YES:  RenalToxicity <==> ['renal toxicity' 'D

YES:  enflurane <==> ['enflurane' 'Chemical' 'D004737'] 	 ***  enflurane
YES:  isoflurane <==> ['isoflurane' 'Chemical' 'D007530'] 	 ***  isoflurane
YES:  labetalol <==> ['Labetalol' 'Chemical' 'D007741'] 	 ***  labetalol
YES:  hypotensive <==> ['hypotensive' 'Disease' 'D007022'] 	 ***  hypotensive
YES:  halothane <==> ['halothane' 'Chemical' 'D006221'] 	 ***  halothane
YES:  enflurane <==> ['enflurane' 'Chemical' 'D004737'] 	 ***  enflurane
YES:  isoflurane <==> ['isoflurane' 'Chemical' 'D007530'] 	 ***  isoflurane
YES:  halothane <==> ['halothane' 'Chemical' 'D006221'] 	 ***  halothane
YES:  H <==> ['H' 'Chemical' 'D006859'] 	 ***  h
YES:  enflurane <==> ['enflurane' 'Chemical' 'D004737'] 	 ***  enflurane
YES:  isoflurane <==> ['isoflurane' 'Chemical' 'D007530'] 	 ***  isoflurane
YES:  H <==> ['H' 'Chemical' 'D006859'] 	 ***  h
YES:  hypotension <==> ['Hypotension' 'Disease' 'D007022'] 	 ***  hypotension
YES:  fentanyl <==> ['Fentanyl' 'Chemical' 'D005283'] 	 ***  fentanyl
YES:  labe

YES:  seizures <==> ['Seizures' 'Disease' 'D012640'] 	 ***  seizures
YES:  seizures <==> ['Seizures' 'Disease' 'D012640'] 	 ***  seizures
YES:  drowsiness <==> ['Drowsiness' 'Disease' 'D006970'] 	 ***  drowsiness
YES:  ataxia <==> ['ataxia' 'Disease' 'D001259'] 	 ***  ataxia
YES:  timolol <==> ['timolol' 'Chemical' 'D013999'] 	 ***  timolol
YES:  hypertensive <==> ['Hypertensive' 'Disease' 'D006973'] 	 ***  hypertensive
YES:  timolol <==> ['timolol' 'Chemical' 'D013999'] 	 ***  timolol
YES:  hypertension <==> ['Hypertension' 'Disease' 'D006973'] 	 ***  hypertension
YES:  fatigue <==> ['fatigue' 'Disease' 'D005221'] 	 ***  fatigue
YES:  dizziness <==> ['Dizziness' 'Disease' 'D004244'] 	 ***  dizziness
YES:  weakness <==> ['weakness' 'Disease' 'D018908'] 	 ***  weakness
YES:  nephropathy <==> ['nephropathy' 'Disease' 'D007674'] 	 ***  nephropathy
YES:  prostaglandins <==> ['prostaglandins' 'Chemical' 'D011453'] 	 ***  prostaglandins
YES:  prostaglandins <==> ['prostaglandins' 'Chemical' 

YES:  visualloss <==> ['visual loss' 'Disease' 'D014786'] 	 ***  visualloss
YES:  chorioretinalatrophy <==> ['chorioretinal atrophy' 'Disease' 'C566236'] 	 ***  chorioretinalatrophy
YES:  quinine <==> ['quinine' 'Chemical' 'D011803'] 	 ***  quinine
YES:  quinine <==> ['quinine' 'Chemical' 'D011803'] 	 ***  quinine
YES:  toxicity <==> ['Toxicity' 'Disease' 'D064420'] 	 ***  toxicity
YES:  Suxamethonium <==> ['Suxamethonium' 'Chemical' 'D013390'] 	 ***  suxamethonium
YES:  myalgia <==> ['myalgia' 'Disease' 'D063806'] 	 ***  myalgia
YES:  halothane <==> ['halothane' 'Chemical' 'D006221'] 	 ***  halothane
YES:  nitrousoxide <==> ['nitrous oxide' 'Chemical' 'D009609'] 	 ***  nitrousoxide
YES:  oxygen <==> ['oxygen' 'Chemical' 'D010100'] 	 ***  oxygen
YES:  suxamethonium <==> ['Suxamethonium' 'Chemical' 'D013390'] 	 ***  suxamethonium
YES:  apnoea <==> ['apnoea' 'Disease' 'D001049'] 	 ***  apnoea
YES:  myalgia <==> ['myalgia' 'Disease' 'D063806'] 	 ***  myalgia
YES:  myalgia <==> ['myalgia' 

YES:  pilocarpine <==> ['pilocarpine' 'Chemical' 'D010862'] 	 ***  pilocarpine
YES:  Scopolamine <==> ['scopolamine' 'Chemical' 'D012601'] 	 ***  scopolamine
YES:  pentobarbital <==> ['pentobarbital' 'Chemical' 'D010424'] 	 ***  pentobarbital
YES:  pilocarpine <==> ['pilocarpine' 'Chemical' 'D010862'] 	 ***  pilocarpine
YES:  seizure <==> ['Seizure' 'Disease' 'D012640'] 	 ***  seizure
YES:  MK-801 <==> ['MK801' 'Chemical' 'D016291'] 	 ***  mk801
YES:  seizure <==> ['Seizure' 'Disease' 'D012640'] 	 ***  seizure
YES:  pilocarpine <==> ['pilocarpine' 'Chemical' 'D010862'] 	 ***  pilocarpine
YES:  Scopolamine <==> ['scopolamine' 'Chemical' 'D012601'] 	 ***  scopolamine
YES:  pentobarbital <==> ['pentobarbital' 'Chemical' 'D010424'] 	 ***  pentobarbital
YES:  pilocarpine <==> ['pilocarpine' 'Chemical' 'D010862'] 	 ***  pilocarpine
YES:  seizure <==> ['Seizure' 'Disease' 'D012640'] 	 ***  seizure
YES:  MK-801 <==> ['MK801' 'Chemical' 'D016291'] 	 ***  mk801
YES:  seizure <==> ['Seizure' 'Dis

In [12]:
######################################################################################################
# Print table with results: # matched NEs; several table variants
######################################################################################################

import numpy as np
import time
import glob
from tabulate import tabulate

methods = [
    'E1', 'E2', 'A1', 'A2', 'A3', 'Total # NEs'
]
sets = [
    'Training set', 'Development set', 'Test set'
]
mat = np.array([
    [7113, 5037, 5014],
    [8779, 5682, 5682],
    [9309, 5848, 5845],
    [9309, 6085, 6101],
    [9309, 6098, 6134],
    
    [9385, 9591, 9809], # total

])

tab_header = ['Abstraction method'] + sets
print tabulate(mat, 
               headers=tab_header, tablefmt='fancy_grid', showindex=methods,
               numalign='center')#, floatfmt=(".4f", ".4f", ".4f", ".4f", ".4f", ".4f", ".6f"))
print tabulate(mat, 
               headers=tab_header, tablefmt='latex_booktabs', showindex=methods,
               numalign='center')#, floatfmt=(".4f", ".4f", ".4f", ".4f", ".4f", ".4f", ".6f"))
######################################################################################################
methods = [
    'E1', 'E2', 'A1', 'A2', 'A3'
]
sets = [
    'Training set', 'Development set', 'Test set'
]
mat = np.array([
    [7113, 5037, 5014],
    [8779, 5682, 5682],
    [9309, 5848, 5845],
    [9309, 6085, 6101],
    [9309, 6098, 6134],
])
mat = mat / np.array([9385., 9591., 9809.])

tab_header = ['Abstraction method'] + sets
print tabulate(mat, 
               headers=tab_header, tablefmt='fancy_grid', showindex=methods,
               numalign='center', floatfmt=(".4f", ".4f", ".4f", ".4f", ".4f", ".4f", ".6f"))
print tabulate(mat, 
               headers=tab_header, tablefmt='latex_booktabs', showindex=methods,
               numalign='center', floatfmt=(".4f", ".4f", ".4f", ".4f", ".4f", ".4f", ".6f"))
######################################################################################################
methods = [
    'E1', 'E2', 'A1', 'A2', 'A3', 'Total # NEs'
]
sets = [
    'Training set', 'Development set', 'Test set'
]
mat = np.array([
    [7113, 5037, 5014],
    [8779, 5682, 5682],
    [9309, 5848, 5845],
    [9309, 6085, 6101],
    [9309, 6098, 6134],
    [9385**2, 9591**2, 9809**2], # total
])
mat = mat / np.array([9385., 9591., 9809.])

tab_header = ['Abstraction method'] + sets
print tabulate(mat, 
               headers=tab_header, tablefmt='fancy_grid', showindex=methods,
               numalign='center', floatfmt=(".3f", ".3f", ".3f", ".3f", ".3f", ".3f", ".3f"))
#                numalign='center', floatfmt=(".4f", ".4f", ".4f", ".4f", ".4f", ".4f", ".6f"))
print tabulate(mat, 
               headers=tab_header, tablefmt='latex_booktabs', showindex=methods,
               numalign='center', floatfmt=(".3f", ".3f", ".3f", ".3f", ".3f", ".3f", ".3f"))


╒══════════════════════╤════════════════╤═══════════════════╤════════════╕
│ Abstraction method   │  Training set  │  Development set  │  Test set  │
╞══════════════════════╪════════════════╪═══════════════════╪════════════╡
│ E1                   │      7113      │       5037        │    5014    │
├──────────────────────┼────────────────┼───────────────────┼────────────┤
│ E2                   │      8779      │       5682        │    5682    │
├──────────────────────┼────────────────┼───────────────────┼────────────┤
│ A1                   │      9309      │       5848        │    5845    │
├──────────────────────┼────────────────┼───────────────────┼────────────┤
│ A2                   │      9309      │       6085        │    6101    │
├──────────────────────┼────────────────┼───────────────────┼────────────┤
│ A3                   │      9309      │       6098        │    6134    │
├──────────────────────┼────────────────┼───────────────────┼────────────┤
│ Total # NEs          │ 