In [15]:
import re
import json, os

def read_negations(file_path):
    negations = {}
    with open(file_path, 'r') as file:
        for line in file:
            negation, tag = line.strip().split('\t\t')
            negations[negation] = tag
    return negations

def new_tag(start, end, labels, id):
    return {'value': {'start': start, 'end': end, 'labels': labels}, 'id': 'ent'+str(id), 'from_name': 'label', 'to_name': 'text', 'type': 'labels'}

def fordward_scope(tagged_text, start_index):
    end_index = start_index
    tagger_checker = tagged_text[end_index]
    
    # Loop until "." is found or end of tagged_sentence
    while tagger_checker !="." and end_index != len(tagged_text)-1:
        end_index += 1
        tagger_checker = tagged_text[end_index]
    
    return end_index -1

def backward_scope(tagged_text, end_index):
    start_index = end_index
    tagger_checker = tagged_text[start_index]
    # Loop until "." is found or beginning of tagged_sentence
    while tagger_checker != "." and start_index != 0:
        start_index -= 1
        tagger_checker = tagged_text[start_index]

        if(tagger_checker=="."): start_index + 1

    return start_index 

def tag_negations(text, negations):
    result=[]
    negations_of_the_text=dict()
    for negation in negations:
        pattern = r'\b' + re.escape(negation) + r'\b'
        if re.search(pattern, text):
            tag = negations[negation]
            negations_of_the_text[negation]=tag
    i=0
    for negation in negations_of_the_text:
        pattern = r'\b' + re.escape(negation) + r'\b'
        tag = negations[negation]
        # Find all occurrences of the negation in the tagged_sentence
        negation_occurrences = re.finditer(pattern, text)

        # Iterate over each occurrence of the negation
        
        for match in negation_occurrences:
            if(tag[-2]=="P"):
                result.append(new_tag(match.start(), match.end(), ['UNC'], i))
            else:
                result.append(new_tag(match.start(), match.end(), ['NEG'], i))
            
            i+=1
        
        # Find all occurrences of the negation in the tagged_sentence
        negation_occurrences = re.finditer(pattern, text)

        for match in negation_occurrences:
            if tag == '[PREN]':
                scope_tag = '[NSCO]'
                start_index = match.end()
                end_index = fordward_scope(text, start_index)
            elif tag == '[PREP]':
                scope_tag = '[USCO]'
                start_index = match.end()
                end_index = fordward_scope(text, start_index)
            elif tag == '[POST]':
                scope_tag = '[NSCO]'
                end_index = match.start() 
                start_index = backward_scope(text, end_index)
            elif tag == '[POSP]':
                scope_tag = '[USCO]'
                end_index = match.start() 
                start_index = backward_scope(text, end_index)
            result.append(new_tag(start_index, end_index, [scope_tag], i))
            i+=1
        return result

def process_text(data, negations):

    copy_json_object=data
    for i in range(len(copy_json_object)):
        copy_json_object[i]["predictions"][0]["result"]=[]
    
    for i in range(len(data)):
        text=data[i]["data"]["text"]
        result=tag_negations(text, negations)
        copy_json_object[i]["predictions"][0]["result"].extend(result)

    with open("../data/output_blind_negex.json", "w") as json_file:
        json.dump(copy_json_object, json_file)
    



ROOT_DIR = os.path.dirname(os.path.abspath(""))

negations_file = '../data/negation_speculation_word.txt'
data = json.load(open(os.path.join(ROOT_DIR, "data", "training_data.json")))

negations = read_negations(negations_file)
process_text(data, negations)