In [None]:
import os
import json
import math
import pandas as pd
import seaborn as sns
import redis
from itertools import chain
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from random import shuffle
from tqdm import tqdm
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']#['retina']
sns.set_style("whitegrid")
pd.options.display.max_columns = None

## Load experiment video data from json

In [None]:
experiment_name = 'example_experiment'
video_data_path = f"experiment/public/experiment_data/video_data_{experiment_name}.json"
with open(video_data_path) as f:
    video_data = json.load(f)

for v in video_data:
    v["objectnet"] = True if v['image'][0] != 'I' else False

video_data = pd.DataFrame(video_data)

In [None]:
video_to_num = {}
for video in tqdm(video_data['video'].unique()):
    video_num = video_data[video_data['video'] == video].index[0]
    video_to_num[video] = video_num
    

## Load test subject responses from redis

In [None]:
environment = os.environ
EXPERIMENT_HOST = environment.get("EXPERIMENT_HOST")
EXPERIMENT_PORT = environment.get("EXPERIMENT_PORT")
REDIS_HOST = environment.get("REDIS_HOST")
REDIS_PORT = environment.get("REDIS_PORT")
REDIS_DB = environment.get("REDIS_DB")
REDIS_PASSWORD = environment.get("REDIS_PASSWORD")
NUM_IMAGES_PER_TASK = environment.get("NUM_IMAGES_PER_TASK")
NUM_VIDEO_LISTS_PER_LINK = environment.get("NUM_VIDEO_LISTS_PER_LINK")

In [None]:
load_from_db = True
all_workers = set()
if load_from_db:
    r = redis.Redis(host=REDIS_HOST,
                port=REDIS_PORT,
                password=REDIS_PASSWORD,
                charset="utf-8",
                decode_responses=True, 
                db=REDIS_DB)
    link_ids = r.smembers(experiment_name)
    video_responses = {}
    responses = {}
    worker_ids = {id: [] for id in link_ids}
    worker_to_link = {}
    for key in r.keys():
        if key.endswith(':responses'):
            all_workers.add(key[19:-10])
            if key[:8] not in link_ids:
                continue
            worker_ids[key[:8]].append(key[19:-10])
            if not worker_to_link.get(key[19:-10]):
                worker_to_link[key[19:-10]] = []
            worker_to_link[key[19:-10]].append(key[:8])
    
    for worker in worker_to_link:
        for link in worker_to_link[worker]:
            r.sadd(worker + ':links', link)
        
    all_assignments = set()
    for link_id in link_ids:
       
        for worker_id in worker_ids[link_id]:
            if not responses.get(worker_id):
                responses[worker_id] = []
            response = r.smembers(link_id+":worker_id:" + worker_id + ":responses")
            trials = []
            trial = list(response)[0]
            assignment_id = r.hgetall(link_id+":worker_id:" + worker_id +":response:"+trial).get('assignment_id')

            if len(response) > 0: NUM_IMAGES_PER_TASK:
                for trial in list(response):
                    trials.append(r.hgetall(link_id+":worker_id:" + worker_id +":response:"+trial))
            all_assignments.add(assignment_id)

            responses[worker_id].extend(trials)
        
    response_data = pd.DataFrame(list(chain.from_iterable(responses.values())))
    response_data['video_num'] = response_data['video_num'].map(video_to_num)
    response_data['response_time'] = response_data['response_time'].astype(int)/1000
    del response_data['video']
    response_data = response_data.set_index('video_num').join(video_data)
    
    
# maintain a list of all workers
with open(f'worker_ids_{experiment_name}.json', 'w') as f:
    json.dump(list(all_workers), f)
    print(len(all_workers))



In [None]:
response_data.trial_num = response_data.trial_num.astype(int)
response_data = response_data.sort_values("trial_num")
response_data.dropna(how='any', inplace=True)
        
        
# convert frame counts to ms
def get_adjusted_duration(timing):
    return round(timing / 60 * 1000)
response_data['image_duration'] = [get_adjusted_duration(a) for a in response_data['image_duration']]

# mark correctness of responses
response_data = response_data.fillna(0)
response_data['is_correct'] = [(1 if a==True else 0) for a in (response_data['response']==response_data['label'])]
                 
response_data

In [None]:
"""
Finds list of workers who have less than 60% accuracy at 10s image duration. You should strongly consider ignoring
these results and reposting the assignments for replacement by other workers. We do not necessarily recommend rejecting 
workers assignments on MTurk because that can get complicated to do fairly, but we have set up the backend such that you
can block these workers from doing more tasks and mark their assignments for replacement.
"""
worker_accuracies = response_data.groupby(['worker_id', 'image_duration']).mean()['is_correct']
workers_to_block = list(set([x[0] for x in worker_accuracies[worker_accuracies < 0.6].index if x[1] == 10_000]))
print('Workers with low 10s accuracy', len(workers_to_block))

# adds workers to blocked list
for worker in workers_to_block:
    r.sadd('blocked_worker_ids', worker)
    
    
# marks all assignments completed by blocked workers for replacement by new workers
assignments_to_replace = response_data[response_data['worker_id'].isin(workers_to_block)]['assignment_id'].unique()
for assignment in assignments_to_replace:
    r.sadd('assignments_to_replace', assignment)
    
    
response_data = response_data[~response_data['worker_id'].isin(workers_to_block)]
response_data.to_csv(f'{experiment_name}_results.csv') # save experiment to csv

In [None]:
"""
Keep track of how many unique workers have completed tasks for each link. 
This should give you a good idea how close you are to getting the whole
complement of responses and which links you need to repost with how
many new assignments.
"""
link_counts = response_data.groupby(['link_id']).nunique()['worker_id']
link_counts

In [None]:
"""
To get more explicit information about which results are missing, take a 
look at how many times each image has been seen at each timing. Images
with fewer responses at a given timing than they should have indicate
assignments that are yet to be completed or need replacement.
"""
num_workers_per_image_per_duration = #TODO
exp_image_count = response_data.groupby(["image_duration", "image"]).count()["trial_num"] #TODO
need_more_responses = list(set([x[1] for x in exp_image_count[exp_image_count < num_workers_per_image_per_duration].index]))
need_more_responses