In [1]:
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.metrics import edit_distance
from nltk import pos_tag, word_tokenize, RegexpParser
from nltk.chunk import ne_chunk

from fuzzywuzzy import fuzz

import string
import re

from textblob import TextBlob

import spacy
from spacy import displacy
import scispacy

from negspacy.negation import Negex
from negspacy.termsets import termset

import networkx as nx



### Fuzzywuzzy string matching algorithm for identifying operating negations

In [2]:
#identify negations_p1 in the given text
def identify_negative_words(input_text_tokens):
    negations = ['no', 'none', 'not', 'neither', 'nor', 'never', "nothing", "nobody", "nowhere"]
    a = []
    idx_text = []
    negation_words = []
    for i in range(len(negations)):
        a.append([])
        idx_text.append([])
        for j in range(len(input_text_tokens)):
            a[i].append(fuzz.ratio(negations[i], input_text_tokens[j]))
        idx_text[i] = [idx for idx, x in enumerate(a[i]) if x==100]
    for m in range(len(idx_text)):
        if idx_text[m] != []:
            [negation_words.append(input_text_tokens[t]) for t in idx_text[m]]
    return negation_words

### Regular expressions to identify prefix, suffix type negations

In [3]:
#identify prefix_negations in the text
def identify_prefix_neg_words(input_text_tokens):
    prefix_negs = ['un', 'im', 'in', 'il', 'ir', 'dis', 'non', 'anti', 'de', 'counter', 'mal']
    prefix_neg_words = []
    idx_text = []
    for i in range(len(prefix_negs)):
        idx_text.append([])
        for j in range(len(input_text_tokens)):
            pattern = "^%s[a-z]+$" %prefix_negs[i]
            blob=TextBlob(input_text_tokens[j])
            if (re.search(pattern, input_text_tokens[j]) and blob.sentiment.polarity<0):
                idx_text[i].append(j)
    for m in range(len(idx_text)):
        if idx_text[m] != []:
            [prefix_neg_words.append(input_text_tokens[t]) for t in idx_text[m]]
    return prefix_neg_words

In [4]:
#identify suffix_negations in the text
def identify_suffix_neg_words(input_text_tokens):
    suffix_negs = ['less']
    suffix_neg_words = []
    idx_text = []
    for i in range(len(suffix_negs)):
        idx_text.append([])
        pattern = "^[a-z]+%s$" %suffix_negs[i]
        for j in range(len(input_text_tokens)):
            blob=TextBlob(input_text_tokens[j])
            if (re.search(pattern, input_text_tokens[j]) and blob.sentiment.polarity<=0):
                idx_text[i].append(j)
    for m in range(len(idx_text)):
        if idx_text[m] != []:
            [suffix_neg_words.append(input_text_tokens[t]) for t in idx_text[m]]
    return suffix_neg_words

#### Convert "n't" to "not"

In [5]:
def decontract(phrase):
    phrase = re.sub(r"[c|C]an\'t", "can not", phrase)
    phrase = re.sub(r"[w|W]on\'t", "will not", phrase)
    
    phrase = re.sub(r"n\'t", " not", phrase)
    return phrase

In [6]:
# not using this method anymore as converting "n't" to "not"--this is done 
def identify_punct_neg_cue(input_text_tokens):
    idx_neg_words = []
    neg_words = []
    pattern = "^n't$"
    for j in range(len(input_text_tokens)):
            if re.search(pattern, input_text_tokens[j]):
                idx_neg_words.append(j)
                neg_words.append(input_text_tokens[j])
    return neg_words

### Consolidating 3 methods to identify operating, preffix, suffix type negations in one method

In [7]:
def identify_all_negs(text):
    
    tokens_text = word_tokenize(text)
    lowercase_tokens_text = [t.lower() for t in tokens_text]

    negation_words = identify_negative_words(lowercase_tokens_text)
    prefix_negation_words = identify_prefix_neg_words(lowercase_tokens_text)
    suffix_negation_words = identify_suffix_neg_words(lowercase_tokens_text)
    #punct_neg_words = identify_punct_neg_cue(lowercase_tokens_text)
#     if negation_words== [] and prefix_negation_words== [] and suffix_negation_words == [] and punct_neg_words== []: 
#         print("There are no negation present in the text.")
#     else:
#         if negation_words != []:
#             print("General negative words found in the text:")
#             print(*negation_words, sep = ", ")
#         if prefix_negation_words != []:
#             print("\nPrefixed negative words found in the text:")
#             print(*prefix_negation_words, sep = ", ")
#         if suffix_negation_words != []:
#             print("\nSuffixed negative words found in the text:")
#             print(*suffix_negation_words, sep = ", ")

    all_negs = negation_words+ prefix_negation_words+ suffix_negation_words

    return negation_words, prefix_negation_words, suffix_negation_words, all_negs

#### Code to test above methods

In [8]:
# text = "To summarize... the food was incredible, nay, transcendant... but nothing brings me joy quite like the memory of the pneumatic condiment dispenser."
# text = decontract(text) 
# tokens_text = word_tokenize(text)
# lowercase_tokens_text = [t.lower() for t in tokens_text]
# identify_negative_words(lowercase_tokens_text)

In [9]:
# print("Enter the text: ")
# input_text = input()
# input_text = decontract(input_text) 
# print("\n")
# a,b,c,d = identify_all_negs(input_text)