# Create final evaluation sets and evaluate the models

In [1]:
import csv
import re
import pandas as pd


def normalized(string):
    return re.sub(r'\s+', ' ', string.strip())

def get_final_prediction(score,model):
    """
     This function converts the anthroscore/AtypicalAnimacy scores to a single numerical value in {0,1,2}
    """     
    AtypicalAnimacy_threshold = 0.3 # this was calculated during the experiment.
    score = float(score)

    if model == 'anthroscore':
        if score > 1.0:
            pred = '1'
        elif score < -1.0:
            pred = '0'
        else:
            pred = '2'
            
    elif model == 'AtypicalAnimacy':
        if score > AtypicalAnimacy_threshold:
            pred = '1'
        else:
            pred = '0'

    return pred

def create_final_eval_file(filename,experiment,model,all_indices_dict):

    with open(f"../{experiment}/{model}/predictions/csv/{filename}_{experiment}_{model}_predictions.csv","w") as outfile:
        
        writer = csv.writer(outfile)
        new_header = ['id','sentence','masked_sentence','AI_phrase','mask','component','expectation','model_score','prediction']
        writer.writerow(new_header)
        infile = open(f"../experiment_2/{model}/predictions/csv/{filename}.csv","r")
        header = infile.readline()
        reader = csv.reader(infile)

        eval_set = f"{experiment}_{model}"
        
        for row in reader:

            indices = all_indices_dict[eval_set]
            
            sentence_id = normalized(indices['id'])
            sentence = normalized(indices['sent'])
            masked_sent = normalized(indices['masked_sent'])
            AI_phrase = normalized(indices['phrase'])
            mask = normalized(indices['mask'])
            component = normalized(indices['comp'])
            expectation = normalized(indices['exp'])
            prediction = normalized(indices['pred'])

            final_pred = get_final_prediction(expectation,model)
            
            write_to_file = [sentence_id,sentence,masked_sent,AI_phrase,mask,component,expectation,prediction,final_pred]
            
            writer.writerow(write_to_file)

all_indices_dict = {'experiment_1_anthroscore':{'id':0,'sent':1,'masked_sent':2,'phrase':3,'mask';4,'comp':6,'exp':7,'pred':8},
              'experiment_1_AtypicalAnimacy':{'id':0,'sent':2,'masked_sent':3,'phrase':5,'mask';6,'comp':8,'exp':10,'pred':15},
              'experiment_2_anthroscore':{'id':0,'sent':1,'masked_sent':2,'phrase':3,'mask':4,'comp':6,'exp':7,'pred':8},
              'experiment_2_AtypicalAnimacy':['id':0,'sent':2,'masked_sent':3,'phrase':5,'mask':6,'comp':8,'exp':9,'pred':10}
             }

files = ["adjective_phrases_inconclusive",
         "adjective_phrases_negative",
         "adjective_phrases_positive",
         "comparisons_inconclusive",
         "noun_phrases_positive",
         "possessives_positive",
         "verb_objects_inconclusive",
         "verb_objects_negative",
         "verb_objects_positive",
         "verb_subjects_inconclusive",
         "verb_subjects_negative",
         "verb_subjects_positive"
        ]

for file in files_retest:
    file_dict = get_scores_dict(file)
    concat_info(file,file_dict)