# Create final evaluation sets

In [18]:
# imports

import csv
import re
import pandas as pd

In [19]:
files = ["adjective_phrases_inconclusive",
         "adjective_phrases_negative",
         "adjective_phrases_positive",
         "comparisons_inconclusive",
         "noun_phrases_positive",
         "possessives_positive",
         "verb_objects_inconclusive",
         "verb_objects_negative",
         "verb_objects_positive",
         "verb_subjects_inconclusive",
         "verb_subjects_negative",
         "verb_subjects_positive"
        ]

In [20]:
def normalized(string):
    return re.sub(r'\s+', ' ', string.strip())

def get_final_prediction(score,model):
    """
     This function converts the anthroscore/AtypicalAnimacy scores to a single numerical value in {0,1,2}
    """     
    AtypicalAnimacy_threshold = 0.3 # this was calculated during the experiment.
    score = float(score)

    if model == 'anthroscore':
        if score > 1.0:
            pred = '1'
        elif score < -1.0:
            pred = '0'
        else:
            pred = '2'
            
    elif model == 'AtypicalAnimacy':
        if score > AtypicalAnimacy_threshold:
            pred = '1'
        else:
            pred = '0'

    return pred

def create_final_eval_file(filename,experiment,model,all_indices_dict):
    """
    this function reads info from csv file and writes it to a file with uniform structure to facilitate evaluation.

    :param filename (str): name of the file 
    :param experiment (str): specify the experiment - used in input and output paths, and for obtaining correct indices
    :param experiment (str): specify the model - used to obtain final prediction {0,1,2} based on the anthro/AtypicalAnimacy score
    :param experiment (dict): pre-defined dictionary containing experiment+model string as key and index dict as value
    
    """ 
    with open(f"../final_sets/{filename}_{experiment}_{model}_predictions.csv","w") as outfile:
        
        writer = csv.writer(outfile)
        new_header = ['id','sentence','masked_sentence','AI_phrase','mask','component','expectation','model_score','prediction']
        writer.writerow(new_header)
        infile = open(f"../{experiment}/{model}/predictions/csv/{filename}.csv","r")
        header = infile.readline()
        reader = csv.reader(infile)

        eval_set = f"{experiment}_{model}"
        
        for row in reader:

            indices = all_indices_dict[eval_set]
            
            sentence_id = normalized(row[indices['id']])
            sentence = normalized(row[indices['sent']])
            masked_sent = normalized(row[indices['masked_sent']])
            AI_phrase = normalized(row[indices['phrase']])
            mask = normalized(row[indices['mask']])
            component = normalized(row[indices['comp']])
            expectation = (normalized(row[indices['exp']])) # should be numerical value {0,1,2}
            expectation = int(float(expectation))
            prediction = normalized(row[indices['pred']])


            final_pred = get_final_prediction(prediction,model)
            
            write_to_file = [sentence_id,sentence,masked_sent,AI_phrase,mask,component,expectation,prediction,final_pred]
            
            writer.writerow(write_to_file)
        
        print(f"Created {filename}_{experiment}_{model}_predictions.csv in ../final_sets/")

all_indices_dict = {'experiment_1_anthroscore':{'id':0,'sent':1,'masked_sent':2,'phrase':3,'mask':4,'comp':6,'exp':7,'pred':8},
              'experiment_1_AtypicalAnimacy':{'id':0,'sent':2,'masked_sent':3,'phrase':5,'mask':6,'comp':8,'exp':10,'pred':15},
              'experiment_2_anthroscore':{'id':0,'sent':1,'masked_sent':2,'phrase':3,'mask':4,'comp':6,'exp':7,'pred':8},
              'experiment_2_AtypicalAnimacy':{'id':0,'sent':2,'masked_sent':3,'phrase':5,'mask':6,'comp':8,'exp':9,'pred':10}
             }

#for file in files:
    #create_final_eval_file(file,'experiment_2','anthroscore',all_indices_dict)