What follows is code to delete (move) all workers who have been inconsistent on a task from the data set

In [2]:
import os
import numpy as np
import pandas as pd
import itertools
import csv

import value_aggregation as pm
import model_experiment
from shared_analysis import *

In [3]:
SCENARIOS_DIR = 'data/scenarios'
MTURK_RESULTS_DIR = "data/results/mturk"
LLM_RESULTS_DIR = 'data/results/llm'

scenario = "maximize=True_num-agents=3_belief-steps=10_belief-range=[0.1,0.9]_action-steps=101_action-range=1,101_action-function-log=False_prevent-ties=True_agg-functions=['nash','fehr','util']_disagrees-only=False_num-scenarios=4_sample-size=68.0"
num_scenarios = 4
scenario_file = os.path.join(SCENARIOS_DIR, scenario + ".csv")


run_dir = os.path.join(MTURK_RESULTS_DIR, scenario)


fail_dir = os.path.join(run_dir, "failed_checks")
# os.mkdir(fail_dir)

In [14]:
## This is the loop

for condition in os.listdir(run_dir):
    filename = os.path.join(run_dir, condition)    
    if condition in [".DS_Store"] or "duplicate" in condition or os.path.isdir(filename):
        continue

    df = pd.read_csv(filename)
    num_workers = len(df)

    is_first = df['WorkerId'].duplicated(keep='first')

    non_duplicates = df[~is_first]
    duplicates = df[is_first]

    print(f"{condition}: {len(duplicates)}/{len(df)} duplicate workers")

    ex_df = mturk_explode_df(non_duplicates, num_scenarios)

    mturk_df = (ex_df[ex_df['attention-answer'] == ex_df['attention-response']])

    this_failed_checks = ex_df[ex_df['attention-answer'] != ex_df['attention-response']]

    workers_failed = this_failed_checks['WorkerId'].unique()
    workers_passed = mturk_df[~mturk_df['WorkerId'].isin(workers_failed)]

    print(f"{condition}: {len(workers_failed)}/{len(workers_failed) / num_workers:.2f} failed checks out of {num_workers}")

    failed = pd.concat([non_duplicates[non_duplicates['WorkerId'].isin(workers_failed)], duplicates])
    not_failed = non_duplicates[~non_duplicates['WorkerId'].isin(workers_failed)]

    temp_cols = ['Answer.q_question-{0}', 'Answer.q_question-{0}_attn', 'Answer.q_question-{0}_attn_answer']
    cols = ['HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
           'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
           'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds',
           'Expiration', 'AssignmentId', 'WorkerId', 'AssignmentStatus',
           'AcceptTime', 'SubmitTime', 'AutoApprovalTime', 'ApprovalTime',
           'RejectionTime', 'RequesterFeedback', 'Approve', 'Reject', 'IntegerId']
    for col in temp_cols:
        for i in range(1 , 4 + 1):
            cols.append(col.format(i))

    failed_cleared = failed.copy()
    failed_cleared.loc[:, cols] = np.nan
    not_failed_and_cleared = pd.concat([failed_cleared, not_failed])

    fail_filename = os.path.join(fail_dir, condition)

    failed.to_csv(fail_filename , index=False, quoting=csv.QUOTE_ALL)
    not_failed_and_cleared.to_csv(filename, index=False, quoting=csv.QUOTE_ALL)

chart_type=area_maximize=True.csv: 0/102 duplicate workers
chart_type=area_maximize=True.csv: 0/0.00 failed checks out of 102
chart_type=both_maximize=True.csv: 0/102 duplicate workers
chart_type=both_maximize=True.csv: 0/0.00 failed checks out of 102
chart_type=none_maximize=True.csv: 23/102 duplicate workers
chart_type=none_maximize=True.csv: 48/0.47 failed checks out of 102
chart_type=volume_maximize=True.csv: 0/102 duplicate workers
chart_type=volume_maximize=True.csv: 0/0.00 failed checks out of 102


In [32]:
## create a new file to submit to mturk

In [None]:
## cruft below used to temporarily generate and check the results for the none trial

In [24]:
## This is the loop

condition = 'chart_type=none_maximize=True'
filename = os.path.join(os.path.join(MTURK_RESULTS_DIR, scenario), f'{condition}.csv')

df = pd.read_csv(filename)
num_workers = len(df)

is_first = df['WorkerId'].duplicated(keep='first')

non_duplicates = df[~is_first]
duplicates = df[is_first]

print(f"{condition}: {len(duplicates)}/{len(df)} duplicate workers")

ex_df = mturk_explode_df(non_duplicates, num_scenarios)

mturk_df = (ex_df[ex_df['attention-answer'] == ex_df['attention-response']])

this_failed_checks = ex_df[ex_df['attention-answer'] != ex_df['attention-response']]

workers_failed = this_failed_checks['WorkerId'].unique()
workers_passed = mturk_df[~mturk_df['WorkerId'].isin(workers_failed)]

print(f"{condition}: {len(workers_failed)}/{len(workers_failed) / num_workers:.2f} failed checks out of {num_workers}")

failed = pd.concat([non_duplicates[non_duplicates['WorkerId'].isin(workers_failed)], duplicates])
not_failed = non_duplicates[~non_duplicates['WorkerId'].isin(workers_failed)]

temp_cols = ['Answer.q_question-{0}', 'Answer.q_question-{0}_attn', 'Answer.q_question-{0}_attn_answer']
cols = ['HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
       'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
       'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds',
       'Expiration', 'AssignmentId', 'WorkerId', 'AssignmentStatus',
       'AcceptTime', 'SubmitTime', 'AutoApprovalTime', 'ApprovalTime',
       'RejectionTime', 'RequesterFeedback', 'Approve', 'Reject', 'IntegerId']
for col in temp_cols:
    for i in range(1 , 4 + 1):
        cols.append(col.format(i))

failed_cleared = failed.copy()
failed_cleared.loc[:, cols] = np.nan
not_failed_and_cleared = pd.concat([failed_cleared, not_failed])

fail_filename = os.path.join(fail_dir, condition)


chart_type=none_maximize=True: 0/102 duplicate workers
chart_type=none_maximize=True: 0/0.00 failed checks out of 102


In [25]:
failed.to_csv(fail_filename + ".csv", index=False, quoting=csv.QUOTE_ALL)
not_failed_and_cleared.to_csv(filename, index=False, quoting=csv.QUOTE_ALL)

In [26]:
filename

"data/results/mturk/maximize=True_num-agents=3_belief-steps=10_belief-range=[0.1,0.9]_action-steps=101_action-range=1,101_action-function-log=False_prevent-ties=True_agg-functions=['nash','fehr','util']_disagrees-only=False_num-scenarios=4_sample-size=68.0/chart_type=none_maximize=True.csv"

In [27]:
condition = 'chart_type=none_maximize=True'
filename = os.path.join(os.path.join(MTURK_RESULTS_DIR, scenario), f'{condition}.csv')
df = pd.read_csv(filename)
d = df[df['HITId'].isnull()]
columns = []
delete = []
for col in d.columns:
    if 'Input' in col:
        columns.append(col)
    else:
        delete.append(col)
d = d.drop(delete, axis=1)
renamed = {col : col[len('Input.'):] for col in columns}
d = d.rename(columns=renamed)
d.to_csv(os.path.join(SCENARIOS_DIR, f'{condition}_partial.csv'), index=False)