In [1]:
import spacy
from spacy import displacy
import scispacy

from negspacy.negation import Negex
from negspacy.termsets import termset

import networkx as nx

from tasks_python_files.Task1_Negation_cue_detection import *

import re



#### Some sentences with their scope:
`She does not like Steve Jobs`
scope: like Steve Jobs

`There is no negation`
scope: There is negation

`Jim never likes to go to supermarket in the morning`
scope: likes to go to market in the evening

`Sam is unlikely to give his notebook to you`
scope: Sam is to give his notebook

`Sam is unlikely to give his notebook to you because he does not like you`
scope: to give his notebook

`The production has not grown, but we are hopeful for the future`
scope: grown (one word after neg cue is affected)

### Different rules to identify negation scope

###### `Rule 1:` all words after the negation cue

In [2]:
def get_indices_neg_cues(document, negation_cues):
    indices_neg_cues=[]
    #print(len(negation_cues))
    for i in range(len(negation_cues)):
        for j in range(len(document)):
            if not indices_neg_cues.count(j):
                if document[j].text.lower()==negation_cues[i]:
                    indices_neg_cues.append(j)
                    break
    return indices_neg_cues

In [3]:
def covert_parse_tree_svg_to_png(document):
    svg = displacy.render(document, style='dep')
    output_path = Path("sentence_dep_parse_tree.svg")
    output_path.open("w", encoding="utf-8").write(svg)
    drawing = svg2rlg("sentence_dep_parse_tree.svg")
    renderPM.drawToFile(drawing, "sentence_dep_parse_tree.png", fmt="PNG")

In [4]:
def display_pos_tag_tree(document):
    for token in document:
         print(token.text, '=>',token.pos_,'=>',token.tag_)

    # Load spacy's dependency tree into a networkx graph
    edges = []
    for token in document:
        for child in token.children:
            edges.append(('{0}'.format(token.lower_),
                          '{0}'.format(child.lower_)))
    graph = nx.Graph(edges)
    nx.draw(graph, with_labels = True)
    displacy.render(document, style='dep', jupyter=True)

In [5]:
#after negation cue take remaining part of the sentence
def get_neg_scope_rule1(cues_list, document):
    #neg_cues = get_neg_cues_list(cues_list)
    #print(neg_cues)
    idx_cues_neg = get_indices_neg_cues(document,cues_list)
    #print(idx_cues_neg)
    neg_scope_str=""
    neg_scope_list=[]
    for m in range(len(idx_cues_neg)):
        neg_scope_str= neg_scope_str + ("<"+cues_list[m]+"Scope>")
        neg_scope_list.append([])
        for k in range(idx_cues_neg[m]+1,len(document)):
            neg_scope_str = neg_scope_str + document[k].text
            neg_scope_list[m].append(document[k])
        neg_scope_str= neg_scope_str + ("</"+cues_list[m]+"Scope>")
    return neg_scope_list, neg_scope_str

###### Rule2: after negation cue take 5 words of remaining sentence

In [6]:
#after negation cue take 5 words of remaining sentence
def get_neg_scope_rule2(cues_list, document):
    #neg_cues = get_neg_cues_list(cues_list)
    #print(neg_cues)
    idx_cues_neg = get_indices_neg_cues(document,cues_list)
    #print(idx_cues_neg)
    neg_scope_str=""
    neg_scope_list=[]
#     print(len(idx_cues_neg))
#     print(idx_cues_neg)
#     print(len(cues_list))
#     print(cues_list)
#     print("van")
    for m in range(len(idx_cues_neg)):
        neg_scope_str= neg_scope_str + ("<"+cues_list[m]+"Scope>")
        neg_scope_list.append([])
        n_gram_count = 0
        #print(len(document))
        for k in range(idx_cues_neg[m]+1,len(document)):
            #print(document[k].text)
            if not re.match("^[']$",document[k].text):
                #print(document[k].text)
                n_gram_count = n_gram_count + 1
                if n_gram_count <= 5:
                    neg_scope_str = neg_scope_str + document[k].text
                    neg_scope_list[m].append(document[k])
        neg_scope_str= neg_scope_str + ("</"+cues_list[m]+"Scope>")
    return neg_scope_list, neg_scope_str

###### Rule3_a: finding sibling of neg cue

In [7]:
#finding sibling of neg cue
#print(neg_cues)
#rights: right child of the token
def get_neg_scope_rule3_a(document, cues_list):
    neg_scope_rule3_a = []
    for i in range(len(cues_list)):
        neg_scope_rule3_a.append([])
        for token in document:
            if not re.match('[0-9]|[,.-]',token.text):
                print(token.text)
                if token.text==cues_list[i]:
                    neg_head = token.head
                    #print(neg_head)
                    neg_scope_rule3_a[i].append(neg_head)
                    for tk in neg_head.rights:
                        #print(tk.text)
                        for ds in tk.subtree:
                            neg_scope_rule3_a[i].append(ds)
    return neg_scope_rule3_a

###### Rule3_b: finding head of neg cue and then right children of neg cue

In [8]:
#finding head of neg cue and then right children of neg cue
#print(neg_cues)
#rights: right child of the token
def get_neg_scope_rule3_b(document, cues_list):
    neg_scope_rule3_b = []
    for i in range(len(cues_list)):
        neg_scope_rule3_b.append([])
        for token in document:
            if not re.match('[0-9]|[,.-]',token.text):
                #print(token.text)
                if token.text==cues_list[i]:
                    neg_head = token.head
                    #print(neg_head)
                    neg_scope_rule3_b[i].append(neg_head)
                    for tk in token.rights:
                        #print(tk.text)
                        for ds in tk.subtree:
                            neg_scope_rule3_b[i].append(ds)
    return neg_scope_rule3_b

###### Rule3_final: Negation cue's head, siblings and children

In [9]:
def get_neg_scope_rule3(document, cues_list):
    neg_scope_rule3 = []
    token_idx_lt = []
    for j in range(len(cues_list)):
        neg_scope_rule3.append([])
        for token in document:
            if token.text==cues_list[j] and not token_idx_lt.count(token.i):
                token_idx_lt.append(token.i)
                neg_head = token.head
                #print("Head: "+neg_head.text)
                neg_scope_rule3[j].append(neg_head)
                for tk in token.rights:
                    #print("neg's children right"+tk.text)
                    for ds in tk.subtree:
                        #print("neg token's children's children: "+ds.text)
                        if not re.match('[,.-]|[\s]',ds.text):
                            neg_scope_rule3[j].append(ds)
                for tk in neg_head.rights:
                    #print("neg's head's children: "+tk.text)
                    if tk.text != token.text:
                        for ds in tk.subtree:
                            if not re.match('[,.-]|[\s]',ds.text):
                                #print("neg's head's children's children: "+ds.text)
                                neg_scope_rule3[j].append(ds)
                break
    return neg_scope_rule3

`consider until the next verb`
`consider adjectives adverbs after neg`

`find shortest path between neg cue and next verb and that path will be the scope`

`Rule 2: uses parse tree, it takes neg cue's subsequent siblings and their children. implementation: by taking the head of the token(neg_cue) and then finding siblings of the neg_cue using its head and then subtree is taken for those token which are children of neg_cue's head at right side. subtree gives us the complete tree of the token`


###### Along with the scope, parts of speech and named entities of the scope are retrived in this method

In [10]:
def get_neg_scope_pos_ne_rule3_new(document, cues_list):
    neg_scope_rule3 = []
    token_idx_lt = []
    pos_scope = []
    named_ents = []
    # index of tokens which are in scope
    scope_idx_lt = []

    for e in document.ents:
        named_ents.append(str(e) + " (" + e.label_ + ")")

    for j in range(len(cues_list)):
        neg_scope_rule3.append([])
        scope_idx_lt.append([])
        pos_scope.append([])
        for token in document:
            if token.text.lower() == cues_list[j] and not token_idx_lt.count(token.i):
                token_idx_lt.append(token.i)
                neg_head = token.head
                # print(token_idx_lt)
                for ds in neg_head.subtree:
                    # print("neg token's head's children: "+ds.text)
                    if ds.i not in scope_idx_lt[j]:
                        if not (re.match('[-!?:;,.()]|[\s]', ds.text)) and (ds.i != token.i):
                            neg_scope_rule3[j].append(ds.text)
                            scope_idx_lt[j].append(ds.i)
                            # check neg_head noun verb or adj
                            if ds.pos_ == 'NOUN' or ds.pos_ == 'ADJ' or ds.pos_ == 'VERB':
                                pos_scope[j].append(ds.text + "->" + ds.pos_)
                neg_scope_rule3[j] = " ".join(cat for cat in neg_scope_rule3[j])
                break

    return neg_scope_rule3, pos_scope, named_ents, scope_idx_lt, token_idx_lt

###### Along with the scope, parts of speech and named entities of the scope are retrived in this meth

In [11]:
def get_neg_scope_with_neg(document, cues_list):
    neg_scope_rule3 = []
    token_idx_lt = []
    named_ents = []
    pos_scope = []
    
    named_ents = []
    for e in document.ents:
        named_ents.append(str(e) +" (" + e.label_ + ")")

    
    for j in range(len(cues_list)):
        neg_scope_rule3.append([])
        pos_scope.append([])
        for token in document:
            if token.text.lower()==cues_list[j] and not token_idx_lt.count(token.i):
                token_idx_lt.append(token.i)
                neg_head = token.head
               
                for ds in neg_head.subtree:
                    #print("neg token's head's children: "+ds.text)
                    if ds.text not in neg_scope_rule3[j]:
                        if not (re.match('[-!?:;,.()]|[\s]',ds.text)):
                            neg_scope_rule3[j].append(ds.text)
                            #check neg_head noun verb or adj
                            if ds.pos_ =='NOUN' or ds.pos_ =='ADJ' or ds.pos_ =='VERB':
                                pos_scope[j].append(ds.text+"->"+ds.pos_)                           
                
                neg_scope_rule3[j] = " ".join(cat for cat in neg_scope_rule3[j])

                break
                
    return neg_scope_rule3, pos_scope, named_ents

#### Code to text above methods

In [12]:
# sentences = [
#              "I do not think the instructions provided were helpful",  
#              "Though I did not use a guest room here, my colleagues who did shared that...",
#              "The room was clean including details, I'm talking no dust,no hair in bathroom-shower or beds which I tend to find in other hotels.",
#              "King bed in too small space so that one side did not even have a lamp.",
#              "Our room this time was disappointing.",
#              "I didn't like him ",
#              'She does not like Steve Jobs and number 2.', 
#              'There is no negation', 
#              'Jim never likes to go to supermarket in the morning', 
#              'Sam is unlikely to give his notebook to you',
#              'Sam is unlikely to give his notebook to you because he does not like you', 
#              'The production has not grown, but we are hopeful for the future'
#             ]

In [13]:
# # #commenting for .py file


# for sentence in sentences:
#     print("Sentence is: ")
#     print(sentence)
#     sentence = decontract(sentence) 
#     print("\n")
#     neg_cue_list = identify_all_negs(negations_p1, prefix_negations, suffix_negations, sentence)

#     nlp = spacy.load("en_core_web_sm")
#     doc = nlp(sentence)
    
# # #     neg_scope_list, neg_scope_string = get_neg_scope_rule1(neg_cues,doc)
# # #     print("\nNegation Scope based on rule1: ")
# # #     print(neg_scope_list)
# # #     print(neg_scope_string)
    
#     print("\n")
#     display_pos_tag_tree(doc)
    
# # #     print("Negation Scope based on rule2: ")
# # #     print(get_neg_scope_rule2(neg_cues, doc))
# # #     print("\n")
    
#     print("Negation Scope based on rule3: ")
#     print(get_neg_scope_rule3(doc, neg_cues))
#     print("\n")
    
# #     print("Negation Scope based on rule3_new: ")
# #     print(get_neg_scope_rule3_new(doc, neg_cues))
# #     print("\n")
    
# # #     print("Negation Scope based on rule3_a: ")
# # #     print(get_neg_scope_rule3_a(doc, neg_cues))
# # #     print("\n")
    
# # #     print("Negation Scope based on rule3_b: ")
# # #     print(get_neg_scope_rule3_b(doc, neg_cues))
# # #     print("\n")

In [14]:
# sentence = "He hadn't done any homework, neither had he brought any of his books to class."
# sentence = decontract(sentence) 
# operating_negs, prefix_negs, suffix_negs, all_negs = identify_all_negs(sentence)

# print(all_negs)
# nlp = spacy.load("en_core_web_sm")
# doc = nlp(sentence)

# print("Negation Scope based on rule3_new: ")
# neg_scope, p, ne, scope_idx_list, token_idx_lt = get_neg_scope_pos_ne_rule3_new(doc, all_negs)

# print(neg_scope)
# print("\n")

# display_pos_tag_tree(doc)