In [3]:
import pandas as pd
import glob

# MTurk mapping

This notebook connects the experiment-specific user-ids with the MTurk IDs and with the answer codes. By this it helps verify which worker has completed the experiment and provided a correct answer code.

## Load data

In [4]:
all = pd.read_pickle("outputs/201907111000_duration.pkl", compression="gzip")

### Users who have seen the final screen

In [5]:
users_who_have_finished = all[all.chart.str.contains("@message: final:")].user.unique()
print(all.user.unique().shape[0])
print(users_who_have_finished.shape[0])

173
53


### Are their codes valid?

In [6]:
valid_codes = pd.read_csv("../numbers.txt", names=["number"])
users_codes = (all
    .query("chart.str.contains('@message: final:')")
    .assign(code=lambda x: pd.to_numeric(x.chart.str.split(": ").str[-1]))
    .filter(["user", "code"])
)

(users_codes
    .query("code.isin(@valid_codes.number)")
    .user.unique()
    .shape[0]
) / users_who_have_finished.shape[0]

1.0

### Connect to MTurk ID

In [7]:
users_workers = (pd.concat([pd.read_csv(f) for f in glob.glob('inputs/MTurk/*.csv')], ignore_index = True)
    .filter(["WorkerId", "Answer.surveycode"])
    .rename({"Answer.surveycode": "code"}, axis=1)
    .merge(users_codes, on="code", how="left")
)    
users_workers.query("user.isna() == False").shape[0]

47

### Complete the user-MTurk mapping with the provided IDs

In [8]:
users_workers_complete = (all
    .query("chart.str.contains('workerId')")
    .filter(["user", "message"])
    .rename({"message": "WorkerId"}, axis=1)
    .merge(users_workers, how="outer")
    .query("(user.isna() == False)")
    .query("WorkerId != 'Jonas'")
    .drop_duplicates()
)

In [9]:
users_workers_complete.query("user.isin(@users_who_have_finished)").shape

(53, 3)

In [10]:
all[all.user.isin(users_who_have_finished)].groupby(["user"]).condition.first().reset_index().groupby("condition").user.count()

condition
pilot3c1    13
pilot3c2    14
pilot3c3    14
pilot3c4    12
Name: user, dtype: int64

### Export

In [128]:
users_workers_complete.to_pickle("outputs/201907120900_users_workers_codes.pkl", compression="gzip")
pd.Series(users_who_have_finished).to_pickle("outputs/201907120900_users_who_have_finished.pkl", compression="gzip")