## Lab 3: DDI 

### Imports

In [1]:
import os
import nltk
import re
from nltk.tokenize import word_tokenize
from nltk.parse.corenlp import CoreNLPDependencyParser
from nltk.corpus import stopwords 
import xml.etree.ElementTree as ET
from nltk.tree import Tree

### Variables

In [2]:
devel_path = '../../data/Devel'
test_path = '../../data/Test-DDI'
train_path = '../../data/Train'
outputfile = 'task9.2_develGoal_1.txt'

my_parser = CoreNLPDependencyParser(url="http://localhost:9000")

### Functions

In [61]:
def analyze(sent):
    if len(sent)<= 0:
        return None
    
    mytree, = my_parser.raw_parse(sent)
    tree = mytree.nodes
    ini_token = 0
                   
    # clean tree
    info = ['address', 'head', 'lemma', 'rel', 'word', 'tag']
    for k in range(len(tree)):
        node = tree[k] 
        for key in list(node):
            if key not in info:
                del node[key]
        
        if k != 0:
            # add offsets
            ini_token = sent.find(node['word'] ,ini_token)
            node['start'] = ini_token
            ini_token += len(node['word'])
            node['end'] = ini_token - 1
            
    return tree

def get_entity_nodes(tree, entities, e1, e2):
    entity1 = []
    entity2 = []
    starts1 = entities[e1][0].split(';')
    starts2 = entities[e2][0].split(';')
    ends1 = entities[e1][1].split(';')
    ends2 = entities[e2][1].split(';')
    for k in tree.keys():
        if 'start' in tree[k].keys():
            if str(tree[k]['start']) in starts1 or str(tree[k]['end']) in ends1:
                entity1.append(tree[k])
            elif str(tree[k]['start']) in starts2 or str(tree[k]['end']) in ends2:
                entity2.append(tree[k])
    return entity1, entity2

def check_interaction(analysis, entities, e1, e2):
    # Get entities
    entity1, entity2 = get_entity_nodes(analysis, entities, e1, e2)
    
    int_flag = 0
    effect_flag = 0
    mechanism_flag = 0
    advise_flag = 0
    for key in analysis.keys():
        if 'start' in analysis[key].keys() and analysis[key]['word'] in ["administration", 'None','inhibitor']:
            int_flag = 1
        if 'start' in analysis[key].keys() and analysis[key]['word'] in ['drug','administer', 'effect', 'use','dose']:
            effect_flag = 1
        if 'start' in analysis[key].keys() and analysis[key]['word'] in ['drug', 'administer', 'dose','use','effect','concentration']:
            mechanism_flag = 1
        if 'start' in analysis[key].keys() and analysis[key]['word'] in ['drug','use','effect']:
            advise_flag = 1
            
    if len(entity1) > 0 and len(entity2) > 0:
        # DDI rules
        # e1_e2_under_same_verb -> "advise"
        # e1_e2_under_same_word_but_not_noun_or_verb -> none
        # e1_under_e2 -> none
        for ent1 in entity1:
            if analysis[ent1['head']] in entity2:
                return (0, "null")
        
        for ent1 in entity1:
            for ent2 in entity2:
                if ent1['head'] == ent2['head'] and analysis[ent1['head']]['tag'].lower()[0] not in ['v', 'n']:
                    return (0, "null")
                
                if ent1['head'] == ent2['head'] and analysis[ent1['head']]['tag'].lower()[0] == 'v':
                    return (1, "advise")
    
        
        # under:
        # effect
        # e1 under -> [response, diminish]
        # e2 under -> [effect]
        
        # mechanism
        # e1 or e2 under -> [concentration, absorption]
        
        # int
        # e1 under -> [interact]
        
        # advise:

        # e1 or e2 under -> [take]
        for e in entity1:
            if analysis[e['head']]['lemma'] in ['response', 'diminish', 'enhance'] and not effect_flag:
                return (1, "effect")
            elif analysis[e['head']]['lemma'] in ['absorption', 'metabolism', 'presence']and not mechanism_flag:
                return (1, "mechanism")
            elif analysis[e['head']]['lemma'] in ['interact', 'interaction'] and not int_flag:
                return (1, "int")
            elif analysis[e['head']]['lemma'] in ['take', 'adjustment', 'avoid', 'recommend', 'contraindicate'] and not advise_flag:
                return (1, "advise")
            
        for e in entity2:
            if analysis[e['head']]['lemma'] in ['effect']:
                return (1, "effect")
            elif analysis[e['head']]['lemma'] in ['absorption', 'metabolism', 'level', 'clearance']:
                return (1, "mechanism")
            elif analysis[e['head']]['lemma'] in ['take', 'caution']:
                return (1, "advise")
        
        
        # Clue words
#         type_effect = ['potentiate', 'prevent', 'elevation', 'response', 'effects', 'effect']
#         type_mechanism = ['reduce', 'increase', 'decrease', 'inhibit', 'concentrations']
#         type_int = ['interaction', 'interact' ,'following']
#         type_advise = ['should', 'caution']

#         type_effect = ['potentiate', 'prevent', 'elevation']
#         type_mechanism = ['concentrations']
#         type_int = ['interaction', 'interact']
#         type_advise = ['should', 'caution']
        
#         between_words = []
#         for key in analysis.keys():
#             end_entity1 = max([entity1[i]['end'] for i in range(len(entity1))])
#             start_entity2 = min([entity2[i]['start'] for i in range(len(entity2))])
#             if 'start' in analysis[key].keys() and end_entity1 < analysis[key]['start'] < start_entity2:
#                 between_words.append(analysis[key]['lemma'])
    
#         if len(list(set(type_effect) & set(between_words)))> 0:
#             return (1,"effect")
#         elif len(list(set(type_mechanism) & set(between_words)))> 0:
#             return (1,"mechanism")
#         elif len(list(set(type_int) & set(between_words)))> 0:
#             return (1,"int")
#         elif len(list(set(type_advise) & set(between_words)))> 0:
#             return (1,"advise")
        
        return (0, "null")    
    else:
        return (0, "null")
    

def evaluate(inputdir, outputfile):
    os.system("java -jar ../../eval/evaluateDDI.jar "+ str(inputdir) + " " + str(outputfile))
    return 

def main_function(inputdir):
    outf = open(outputfile, "w")
    # process each file in directory
    for filename in os.listdir(inputdir):
        # parse XML file, obtaining a DOM tree
        fullname = os.path.join(inputdir, filename)
        tree = ET.parse(fullname)
        root = tree.getroot()  
        
        # process each sentence in the file
        for sentence in root.findall('sentence'):
            # Get sentence id and tokenize text
            sent_id = sentence.get('id') # get sentence id
            sent_text = sentence.get('text') #get sentence text
            
            # load sentence entities into a dictionary
            entities = {}
            for ent in sentence.findall('entity'):
                ent_id = ent.get('id')
                offs = ent.get('charOffset').split('-')
                entities[ent_id] = offs
                
            # Tokenize, tag and parse sentence
            analysis = analyze(sent_text)
            # for each pair in the sentence, decide whether it is DDI and its type
            for pair in sentence.findall('pair'):
                id_e1 = pair.get('e1')
                id_e2 = pair.get('e2')
                (is_ddi, ddi_type) = check_interaction(analysis, entities, id_e1, id_e2)
                outf.write(str(sent_id)+'|'+str(id_e1)+'|'+str(id_e2)+'|'+str(is_ddi)+'|'+str(ddi_type))
                outf.write("\n")
    outf.close()
    # get performance score
    evaluate(inputdir, outputfile)

In [64]:
main_function(test_path)