In [37]:
#Text to Numeric Maps:
# Accuracy 
# I think the tweet is `ÄúCOVID-19 related Asian hate' -> 1
# I think the tweet may not mention COVID or Asian, but it is hateful -> 4
# I think the tweet is NOT hateful -> 2

# Bias (stereotype activation)
# Strongly Disagree -> 1
# Disagree -> 2
# Neutral -> 3
# Agree -> 4
# Strongly Agree -> 5

# Discomfort
# Definitely not feeling so -> 1
# Probably not feeling so -> 2
# Not sure -> 3
# Probably feeling so -> 4
# Definitely feeling so -> 5

# Perceived workload
# 1 - Not at all -> 1
# 7 - Completely -> 7



In [38]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import numpy as np
import csv
import pandas as pd

In [39]:
file_name = 'EMNLP_Human_Study_Data.csv'


with open(file_name, 'r') as file:
    reader = csv.DictReader(file)
    fieldnames = reader.fieldnames
    fieldnames.extend(['accuracy', 'stereotype_activation', 'mental_discomfort', 'perceived_workload', 'label_time'])
    print('fieldnames:', fieldnames)
    modified_rows = []

    row_index = 1

    for row in reader:
        print('row_index:', row_index)
        row_index += 1
        
        #Calculate accuracy

        ground_truths = [0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1] #1 is hate, 0 is non-hate
        mapped_predicts = []

        Qualtrics_numeric_to_hate_map = {1:1, 4:1, 2:0}

        for predict_idx in range(12):
            qualtrics_predict = row['Predict' + str(predict_idx+1)]
            mapped_predict = Qualtrics_numeric_to_hate_map[int(qualtrics_predict)]
            mapped_predicts.append(mapped_predict)

        #print('mapped_predicts:', mapped_predicts)
        accuracy = 1 - np.mean([abs(predict - truth) for (predict, truth) in zip(mapped_predicts, ground_truths)])
        print('accuracy:', accuracy)

        row['accuracy'] = accuracy
        
        
        #Calculate stereotype activation
        #Approach: for each of the three implicitly biased statements at the start and the paraphrased statement at the end,
        #map negative changes in agreement score to a 0 change, and keep positive score changes. Average across 3 statements
        bias_change_eat = max(0, int(row['Bias-end_2']) - int(row['Bias-start_1']))
        bias_change_spy = max(0, int(row['Bias-end_4']) - int(row['Bias-start_2']))
        bias_change_apologize = max(0, int(row['Bias-end_1']) - int(row['Bias-start_3']))
        stereotype_activation = np.mean([bias_change_eat, bias_change_spy, bias_change_apologize])
        row['stereotype_activation'] = stereotype_activation
        
        
        
        #Calculate mental discomfort
        discomfort_start = int(row['Disc-start_1']) + int(row['Disc-start_2']) + int(row['Disc-start_3']) + int(row['Disc-start_4']) + int(row['Disc-start_5']) + int(row['Disc-start_6'])
        discomfort_end = int(row['Disc-end_1']) + int(row['Disc-end_2']) + int(row['Disc-end_3']) + int(row['Disc-end_4']) + int(row['Disc-end_5']) + int(row['Disc-end_6'])
        mental_discomfort = discomfort_end - discomfort_start
        print('mental_discomfort:', mental_discomfort)
        row['mental_discomfort'] = mental_discomfort
        
        #Calculate perceived workload
        perceived_workload = np.mean([int(row['Percv-work_1']), int(row['Percv-work_2']), int(row['Percv-work_3']), int(row['Percv-work_4']), int(row['Percv-work_5']), int(row['Percv-work_6'])])
        print('perceived_workload:', perceived_workload)
        row['perceived_workload'] = perceived_workload
        
        #Calculate label time [Unit: seconds]
        modified_rows.append(row)
        label_time = 0
        for predict_idx in range(12):
            page_timer = row['Timer' + str(predict_idx+1) + '_Page Submit']
            label_time += float(page_timer)
        print('label_time:', label_time)
        row['label_time'] = label_time

fieldnames: ['Quality_flag', 'Bias-start_1', 'Bias-start_2', 'Bias-start_3', 'Bias-start-rationale', 'Disc-start_1', 'Disc-start_2', 'Disc-start_3', 'Disc-start_4', 'Disc-start_5', 'Disc-start_6', 'Disc-start-rationale', 'IndFair-1', 'IndFair-2', 'IndFair-3', 'IndFair-4', 'IndFair-5', 'IndFair-6', 'IndFair-7', 'IndFair-8', 'IndFair-9', 'Disc-end_1', 'Disc-end_2', 'Disc-end_3', 'Disc-end_4', 'Disc-end_5', 'Disc-end_6', 'Disc-end-rationale', 'Bias-end_1', 'Bias-end_2', 'Bias-end_3', 'Bias-end_4', 'Bias-end-rationale', 'Percv-work_1', 'Percv-work_2', 'Percv-work_3', 'Percv-work_4', 'Percv-work_5', 'Percv-work_6', 'Percv-work-rationale', 'PROLIFIC_PID', 'Asian_race', 'Explanation_style', 'Prolific_Participant_id', 'Prolific_Total_approvals', 'Prolific_Fluent_languages', 'Prolific_Age', 'Prolific_Sex', 'Prolific_Ethnicity_simplified', 'Prolific_Country_of_birth', 'Prolific_Country_of_residence', 'Prolific_Nationality', 'Prolific_Language', 'Prolific_Student_status', 'Prolific_Employment_sta

In [40]:
output_file_name = 'EMNLP_Human_Study_Data_with_all_Metrics.csv'
with open(output_file_name, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(modified_rows)

1412