In [12]:
import re
def read_negations(file_path):
    negations = {}
    with open(file_path, 'r') as file:
        for line in file:
            negation, tag = line.strip().split('\t\t')
            negations[negation] = tag
    return negations

def fordward_scope(tagged_sentence, start_index):
    end_index = start_index
    tagger_checker = tagged_sentence[end_index]
    
    # Loop until "[" is found or end of tagged_sentence
    while tagger_checker != "[" and end_index != len(tagged_sentence)-1:
        end_index += 1
        tagger_checker = tagged_sentence[end_index]
    
    possible_tag = tagger_checker
    
    if possible_tag == "[":
        i = 0
        # Loop until "]" is found
        while tagger_checker != "]":
            i += 1
            tagger_checker = tagged_sentence[end_index + i]
            possible_tag = possible_tag + tagger_checker
        
        # Check if the possible tag matches the pattern
        if (re.match(r"\[(NEG|UNC|NSCO|USCO)\]", possible_tag)):
            end_index = end_index - 1
        else:
            # Recursive call until end_index == len(tagged_sentence)-1
            end_index = fordward_scope(tagged_sentence, end_index+1)
    
    return end_index

def backward_scope(tagged_sentence, end_index):
    start_index = end_index
    tagger_checker = tagged_sentence[start_index]
    # Loop until "]" is found or beginning of tagged_sentence
    while tagger_checker != "]" and start_index != 0:
        start_index -= 1
        tagger_checker = tagged_sentence[start_index]
    possible_tag = tagger_checker
    if possible_tag == "]":
        i = 0
        # Loop until "]" is found
        while tagger_checker != "[":
            i += 1
            tagger_checker = tagged_sentence[start_index - i]
            possible_tag = possible_tag + tagger_checker
        
        # Check if the possible tag matches the pattern
        if re.match(r"\[(NEG|UNC|NSCO|USCO)\]", possible_tag[::-1]):
            start_index = start_index + 1
        else:
            # Recursive call until start_index == 0
            start_index = backward_scope(tagged_sentence, start_index-1)
    
    return start_index

def tag_negations(sentence, negations):
    tagged_sentence = sentence
    negations_of_the_sentence=dict()
    for negation in negations:
        pattern = r'\b' + re.escape(negation) + r'\b'
        if re.search(pattern, tagged_sentence):
            tag = negations[negation]
            new_tag=""
            if(tag[-2]=="P"):
                new_tag="[UNC]"
            else:
                new_tag="[NEG]"
            tagged_negation=new_tag+''+negation+''+new_tag
            tagged_sentence=re.sub(r'\b' + re.escape(negation) + r'\b', tagged_negation, tagged_sentence)
            negations_of_the_sentence[negation]=tag
    
    for negation in negations_of_the_sentence:
        pattern = r'\b' + re.escape(negation) + r'\b'
        tag = negations[negation]
        new_tag=""
        if(tag[-2]=="P"):
            new_tag="[UNC]"
        else:
            new_tag="[NEG]"
        tagged_negation=new_tag+''+negation+''+new_tag
        
        # Find all occurrences of the negation in the tagged_sentence
        negation_occurrences = re.finditer(pattern, tagged_sentence)

        # Iterate over each occurrence of the negation
        i=0
        for match in negation_occurrences:
            if tag == '[PREN]':
                scope_tag = '[NSCO]'
                start_index = match.start() + len(tagged_negation) - len(new_tag) + (len(scope_tag)*2*i)
                end_index = fordward_scope(tagged_sentence, start_index)
            elif tag == '[PREP]':
                scope_tag = '[USCO]'
                start_index = match.start() + len(tagged_negation) - len(new_tag) + (len(scope_tag)*2*i)
                end_index = fordward_scope(tagged_sentence, start_index)
            elif tag == '[POST]':
                scope_tag = '[NSCO]'
                end_index = match.start() - len(new_tag) + (len(scope_tag)*2*i)
                start_index = backward_scope(tagged_sentence, end_index)
            elif tag == '[POSP]':
                scope_tag = '[USCO]'
                end_index = match.start() - len(new_tag) + (len(scope_tag)*2*i)
                start_index = backward_scope(tagged_sentence, end_index)
            tagged_sentence=tagged_sentence[:start_index+1] + scope_tag+tagged_sentence[start_index+1:end_index]+scope_tag + tagged_sentence[end_index:]
            i+=1
    return tagged_sentence

def process_text(input_file, output_file, negations):
    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        for line in infile:
            sentence = line.strip()
            tagged_sentence = tag_negations(sentence, negations)
            outfile.write(tagged_sentence + '\n')

# Example usage
negations_file = '../data/negation_speculation_word.txt'
text_file = '../data/sentences.txt'
output_file = '../data/output_bilnd_negex.txt'

negations = read_negations(negations_file)
process_text(text_file, output_file, negations)