# Validate the annotated datasets

This notebook provides code for validating the evaluation set .txt files and retrieving information about the anthropomorphic components.

1. Make sure that no sentence was annotated with conflicting annotations
2. Make sure that there are no duplicate sentences in a sentence
3. Make sure that the .txt files used to create the evaluation sets are well-formed - i.e, the IDs contain the database prefix (used to locate them in the dataframe) and that each row contains exactly seven tab-separated values.
4. Check that the annotations are correct - e.g. the positive set contains only ['p1','p2','p3'] scores, the negative set contains only ['n1','n2','n3'] scores, and the inconclusive set has only 'inc'.
5. Retrieving the anthropomorphic components
6. Retrieving the AI entity lemmas (i.e. without descriptors and modifiers)

In [144]:
import re
        
def get_sentences_dict(cat,score):   

    sentences_dict = {}
    duplicate_ids = []
    duplicate_sentence_pairs = []
    
    sentences = open(f"../preprocessed_data/evaluation_sentences/{cat}_{score}.txt","r")
    
    for line in sentences.readlines():
        line = line.strip()
        line = line.split("\t")
        if len(line) == 0:
            break
        sent_id = line[0]
        sent_info = line[1:]

        # wellformedness checks
        if len(line) != 7:
            print(f"The row with the ID {sent_id} in {cat}_{score}.txt is not well-formed.")
        id_prefix = sent_id[:6]
        if not re.match(r"^[1-7]{1}_(arx|acl)_", id_prefix):
            print(f"The ID {sent_id} in {cat}_{score} is not well-formed.")
        
        if sent_id not in sentences_dict:
            if sent_info not in sentences_dict.values():
                sentences_dict[sent_id] = sent_info
            else: # the sentence appears twice with different IDs 
                other_id = [key for key in sentences_dict if sentences_dict[key] == sent][0]
                duplicate_sentence = (other_id,sent_id)
                duplicate_sentence_pairs.append(duplicate_sentence) 
        else: # the sentence appears twice with the same ID
            duplicate_ids.append(sent_id)

    return sentences_dict,duplicate_ids,duplicate_sentence_pairs

In [159]:
def check_num_and_duplicates(cat,score,num):

    response = "No duplicate utterances."
    print(f"Checking for duplicate entries in {cat}_{score}.txt...")

    sentences_dict = get_sentences_dict(cat,score)[0] # dict of ids and sentence info
    duplicate_ids = get_sentences_dict(cat,score)[1] # list of duplicate sentences with identical ids
    duplicate_sentence_pairs = get_sentences_dict(cat,score)[2] # list of duplicate sentences with different ids
    
    if len(sentences_dict.keys()) > num:
        print(f"There are more than {num} sentences in {cat}_{score}.txt.")
    elif len(sentences_dict.keys()) < num:
        print(f"There are less than {num} sentences in {cat}_{score}.txt.")

    if duplicate_ids:
        response = f"Resolve duplicates in the {score} set!!!"
        print("The sentences with the following ids appear twice: ",duplicate_ids,
             f" in {cat}_{score}.txt")

    if duplicate_sentence_pairs:
        response = f"Resolve duplicates in the {score} set!!!"
        print("The following ID pairs refer to the same sentence: ",duplicate_sentence_pairs,
             f" in {cat}_{score}.txt")

    return response

def check_annotations(cat,score):

    annotations_dict = {}

    if cat == "noun_phrases" or cat == 'possessives':
        annotations = ['p']
    else:
        if score == 'positive':
            annotations = ['p1','p2','p3']
        elif score == 'negative':
            annotations = ['n1','n2','n3']
        elif score == 'inconclusive':
            annotations = ['inc']

    print(f"Checking annotations in {cat}_{score}.txt:")

    sentences_dict = get_sentences_dict(cat,score)[0]
    all_sentences_info = sentences_dict.values()

    for sent_id,sent_info in sentences_dict.items():
        if sent_info[5] not in annotations:
            print(f"Fix incorrect annotation {sent_info[5]} in the sentence with the ID {sent_id}")
        if sent_info[5] not in annotations_dict:
            annotations_dict[sent_info[5]] = 1
        else:
            annotations_dict[sent_info[5]] += 1

    return annotations_dict

def pairwise_conflict_check(cat,score1,score2):

    print(f"Comparing {score1} cases and {score2} cases for the {cat} set...")

    conflicting_annotation = False

    dict1 = get_sentences_dict(cat,score1)[0]
    dict2 = get_sentences_dict(cat,score2)[0]

    for id1,sent in dict1.items():
        if id1 in dict2:
            conflicting_annotation = True
            print(f"The {score1} sentence with the ID ",id1,f" appears in the {score2} set with the same ID")
        elif sent in dict2.values():
            conflicting_annotation = True
            id2 = [key for keys in dict2.keys() if dict2[key] == sent][0]
            print(f"The {score1} sentence with the ID  ",id1,
                  f" appears in the {score2} set with the ID ",id2)

    return conflicting_annotation

def check_conflicting_annotations(cat,case,other_cases):

    response = "No conflicting annotations."

    for other_case in other_cases:

        conflicting_annotations = pairwise_conflict_check(cat,case,other_case)
        if conflicting_annotations:
            check = "Resolve conflicts before proceeding."
            print(f"Conflicting annotations in the {case} and {other_case} sets!!!")

    return response

In [304]:
def get_ai_phrases(cat,score,idx):

    ai_components = {}
    
    sentences_dict = get_sentences_dict(cat,score)[0]
    all_sentences_info = sentences_dict.values()
    
    for sent_info in all_sentences_info:
        if sent_info[idx] not in ai_components:
            ai_components[sent_info[idx]] = 1
        else:
            ai_components[sent_info[idx]] += 1

    return ai_components

def get_entities_or_components(cat,score,idx):

    components = {}
    
    sentences_dict = get_sentences_dict(cat,score)[0]
    all_sentences_info = sentences_dict.values()
    
    for sent_info in all_sentences_info:
        elements = sent_info[idx].split(",")
        for element in elements:
            if element not in components:
                components[element] = 1
            else:
                components[element] += 1

    return components

def get_phrase_mask_entity_triplets(cat,score):

    phrase_mask_entity_triplets = []
    
    sentences_dict = get_sentences_dict(cat,score)[0]
    
    for sent_id,sent_info in sentences_dict.items():
        phrase_mask_entity_triplets.append((sent_info[1],sent_info[2],sent_info[3]))

    return phrase_mask_entity_triplets

#### Check sentences for each category

The categories are:
1. verb_subjects - sentences in which the AI entity is the subject of an anthropomorphic verb (nsubj)
2. verb_objects - sentences in which the AI entity is object of an anthropomorphic verb (pobj,dobj)
4. adjective_phrases - sentences in which the AI entity is part of an anthropomorphic adjectival phrase
5. noun_phrases - sentences in which the AI entity is part of an anthropomorphic noun phrase
6. possessives - sentences in which the AI entity is immediately followed by a possessive marker
7. comparisons - sentences in which the AI entity is being compared to humans explicitly

In [318]:
cases_and_nums = {"positive":60,"negative":60,"inconclusive":30}
#cases_and_nums = {"inconclusive":50}
category_is = "verb_subjects" # bring it to the runway

check_AI_phrases_and_masks = True
check_entities_or_anthro = False
check_AI_triplets = False

for case in cases_and_nums:

    # check that the number of utterance matches the expecation, and that the file contains no duplicate sentences
    check1 = check_num_and_duplicates(category_is,case,cases_and_nums[case])
    print(check1,'\n')
    
    # check that the same sentence does not appear twice in two sets of the same category
    # not applicable for noun_phrases, possessives (always positive) and comparisons (always inconclusive)
    other_cases = [other_case for other_case in cases_and_nums if other_case != case]
    if other_cases:
        check2 = check_conflicting_annotations(category_is,case,other_cases)
        print(check2, '\n')

    # check that the annotations in a given file are correct (i.e. no negative annotations in the positive set)
    check3 = check_annotations(category_is,case)
    print(check3,'\n')

    # retrieve the AI phrases and their count
    # 1: the full AI phrase
    # 2: the masked component
    if check_AI_phrases_and_masks == True:
        idx = 2 # options are 1 and 2
        components = get_ai_phrases(category_is,case,idx)
        sorted_list = sorted([(key,value) for key,value in components.items()])
        for item in sorted_list:
            print(item[1],": ",item[0])
    
    # retrieve all of the potentially (non-)anthropomorphic components / AI entities and their count
    # 3: AI entities
    # 4: anthropomorphic components
    if check_entities_or_anthro == True:
        idx = 4 # options are 3 and 4
        anthro_components = get_entities_or_components(category_is,case,idx)
        sorted_anthro_list = sorted([(key,value) for key,value in anthro_components.items()])
        for item in sorted_anthro_list:
            print(item[1],": ",item[0])

    # retrieve the AI phrase,mask,entity triplets and their unique ID
    if check_AI_triplets == True:
        phrase_mask_entity_triplets = get_phrase_mask_entity_triplets(category_is,case)
        for item in phrase_mask_entity_triplets:
            print(item)

Checking for duplicate entries in verb_subjects_positive.txt...
No duplicate utterances. 

Comparing positive cases and negative cases for the verb_subjects set...
Comparing positive cases and inconclusive cases for the verb_subjects set...
No conflicting annotations. 

Checking annotations in verb_subjects_positive.txt:
{'p3': 18, 'p2': 40, 'p1': 2} 

2 :  AI
2 :  AI agent
2 :  AI agents
1 :  AI system
2 :  AI systems
4 :  ChatGPT
1 :  GPT-3
1 :  GPT-4
5 :  LLM
1 :  LLM-based expert
15 :  LLMs
2 :  LM
7 :  LMs
1 :  conversational AI development platform
1 :  instruction-tuned model
1 :  language model
1 :  large generative language model
8 :  model
2 :  system
1 :  trained model
Checking for duplicate entries in verb_subjects_negative.txt...
No duplicate utterances. 

Comparing negative cases and positive cases for the verb_subjects set...
Comparing negative cases and inconclusive cases for the verb_subjects set...
No conflicting annotations. 

Checking annotations in verb_subjects_ne