# Table of Contents
* [**Submitting HITs**](#**Submitting-HITs**)
	* [Building URLs for images on s3](#Building-URLs-for-images-on-s3)
	* [submitting HITs in groups](#submitting-HITs-in-groups)
		* [creates HITs, careful with this one](#creates-HITs,-careful-with-this-one)
* [**Reviewing HITs**](#**Reviewing-HITs**)
	* &nbsp;
		* [bad images](#bad-images)
		* [Retrieving and processing latest HITs](#Retrieving-and-processing-latest-HITs)
		* [consensus](#consensus)
	* [**sending to review tool**](#**sending-to-review-tool**)
		* [randomly sampling HITs](#randomly-sampling-HITs)
		* [sampling no consensus](#sampling-no-consensus)
		* [looking at individual workers](#looking-at-individual-workers)
		* [store cells](#store-cells)
* [Merging latest round of HITs into combined dataset](#Merging-latest-round-of-HITs-into-combined-dataset)
	* [Load prior complete dataset if not in memory](#Load-prior-complete-dataset-if-not-in-memory)
	* [Updating full dataset](#Updating-full-dataset)
	* [Working with full dataset](#Working-with-full-dataset)
* [Worker Analysis](#Worker-Analysis)
	* [Basic worker stats](#Basic-worker-stats)
		* [HIT duration for pricing](#HIT-duration-for-pricing)
		* [HIT pricing check](#HIT-pricing-check)
	* [Identifying high and low consensus workers](#Identifying-high-and-low-consensus-workers)
		* [high](#high)
		* [low](#low)
	* [Email contact with workers](#Email-contact-with-workers)
	* [Messaging workers](#Messaging-workers)
* [**HIT end-of-life**](#**HIT-end-of-life**)
	* [Pickle latest results](#Pickle-latest-results)
	* [Pickle combined dataset](#Pickle-combined-dataset)
	* [Accepting and deleting HITs... careful with these](#Accepting-and-deleting-HITs...-careful-with-these)
		* [accepting HITS](#accepting-HITS)
		* [rejecting HITS, banning bad workers](#rejecting-HITS,-banning-bad-workers)
		* [deleting all HITS](#deleting-all-HITS)
* [End](#End)


In [2]:
%%capture
from __future__ import division
import numpy as np
import pandas as pd
import scipy.stats as st
import itertools
import math
from collections import Counter, defaultdict
%load_ext autoreload
%autoreload 2

#The lines commented below set the look and feel of mpl generated plots.
import matplotlib as mpl
mpl.use("Agg")
import matplotlib.pylab as plt
# #%matplotlib notebook
%matplotlib inline
%load_ext base16_mplrc
%base16_mplrc light default
plt.rcParams['figure.figsize'] = (16.0, 10.0)

import re
import pickle
import boto
import json
import os

from copy import deepcopy
import boto.mturk.connection as tc
import boto.mturk.question as tq
from boto.mturk.qualification import PercentAssignmentsApprovedRequirement, Qualifications, Requirement

# These lines import my aws access keys
from keysTkingdom import mturk_ai2
from keysTkingdom import aws_tokes


import amt_utils.process_hits as amt_util
import amt_utils.turk_email_utils as turkmail_util
import amt_utils.shining_rel_build as rbuild
import glob
import copy

# **Submitting HITs**

## Building URLs for images on s3

In [330]:
images_to_annotate = pd.read_csv('./list_to_annotate.txt', header=None)
images_to_annotate.columns= ['image_name']
images_to_annotate_list = images_to_annotate['image_name'].tolist()
# len(images_to_annotate_list)

In [331]:
images_to_annotate_new = pd.read_csv('./to_annotate_minus_other.txt', header=None)
images_to_annotate_new.columns= ['image_name']
images_to_annotate_new_list = images_to_annotate_new['image_name'].tolist()
# len(images_to_annotate_new_list)

In [332]:
bad_images = pd.read_csv('./bad.txt', header=None)
bad_images.columns= ['image_name']
bad_images = bad_images['image_name'].tolist()
easy_images = pd.read_csv('./easy.txt', header=None)
easy_images.columns= ['image_name']
easy_images = easy_images['image_name'].tolist()
difficult_images = pd.read_csv('./difficult.txt', header=None)
difficult_images.columns= ['image_name']
difficult_images = difficult_images['image_name'].tolist()
lbi = len(bad_images)
lei = len(easy_images)
ldi = len(difficult_images)
# print ldi + lei + lbi
images_we_triaged = bad_images + easy_images + difficult_images

In [387]:
# sorted(set(images_to_annotate_new_list).difference(set(images_we_triaged)), key=lambda x:int(x.split('.')[0]))
# sorted(set(images_to_annotate_list).difference(set(images_to_annotate_new_list)), key=lambda x:int(x.split('.')[0]))

In [335]:
with open('dim_lookup.pkl',) as f:
    img_dim_lookup = pickle.load(f)

In [334]:
app_url_base = 'https://s3-us-west-2.amazonaws.com/ai2-vision-turk-data/shining-3-watercycle-test/build/index.html?url='
completed_images = pd.read_csv('completed_images.csv', header=None).values

In [336]:
easy_image_urls = amt_util.make_diagram_hit_urls(easy_images, img_dim_lookup, app_url_base, completed_images) 
difficult_image_urls = amt_util.make_diagram_hit_urls(difficult_images, img_dim_lookup, app_url_base, completed_images) 

In [419]:
print len(easy_image_urls)
print len(difficult_image_urls)
print len(easy_image_urls) + len(difficult_image_urls)

94
178
272


In [418]:
pages_to_use_sub = amt_util.make_diagram_hit_urls(images_to_annotate_new_list[15:16], img_dim_lookup, app_url_base, []) 

## submitting HITs in groups

**DON'T FORGET to change submission POST request in the client when changing host**

In [3]:
## Switch between sandbox and the real world here ##
## DON'T FORGET to change submission POST request in the client ##

sandbox_host = 'mechanicalturk.sandbox.amazonaws.com' 
mturk = tc.MTurkConnection(
    aws_access_key_id = aws_tokes.access_key,
    aws_secret_access_key = aws_tokes.access_secret_key,
    host = sandbox_host,
    debug = 1 # debug = 2 prints out all requests.
)
current_account_balance = mturk.get_account_balance()[0]
if current_account_balance.amount == 10000:
    print "Working in the SANDBOX with"
else:
    print "Working in the REAL WORLD with"
print current_account_balance # a reminder of sandbox

Working in the SANDBOX with
$10,000.00


In [417]:
static_params = {
    'title': "Annotate shining 3 water cycles",
    'description': "you know what to do",
    'keywords': ['findme'],
    'frame_height': 800,
    'amount': 1.0,
    'duration': 3600 * 12,
    'lifetime': 3600 * 24 * 3,
    'max_assignments': 1   # change to 3 when running for real
}

static_params_easy= copy.deepcopy(static_params)
static_params_difficult = copy.deepcopy(static_params)

static_params_easy['title']  = "Annotate easy shining 3 water cycles"
static_params_difficult['title']  = "Annotate difficult shining 3 water cycles"

### creates HITs, careful with this one

**uncomment cell below only when ready to submit**

In [420]:
# amt_util.create_hits_from_pages(mturk, pages_to_use_sub, static_params_difficult)
# amt_util.create_hits_from_pages(mturk, easy_image_urls, static_params_easy)
# amt_util.create_hits_from_pages(mturk, difficult_image_urls, static_params_difficult)

# **Reviewing HITs**

### bad images

68.png

67 dupe


look into why image 76 doesn't work

review tool by category


default to blobs for linkages

### Retrieving and processing latest HITs

In [32]:
r_hits_current_batch = amt_util.get_completed_hits(mturk)
assignment_results_current_batch = amt_util.get_assignments(mturk, r_hits_current_batch, 'Submitted')

In [33]:
raw_hit_results_current_batch = amt_util.process_raw_hits(assignment_results_current_batch)

In [34]:
amt_util.get_assignment_statuses(assignment_results_current_batch )

Submitted    92
dtype: int64

The commands above interact with mechanical turk and can take a while

In [35]:
results_df_shining_batch = amt_util.make_shining_results_df(raw_hit_results_current_batch)

In [36]:
results_df_shining_batch.head()

Unnamed: 0,page,category,hit_id,assignment_id,id,worker_id,group_n
0,4935.png,[unlabeled],3L2OEKSTW9BLL4JK737OPMM7XITY8E,3N2BF7Y2VQVH7ITTH6ILXONG5GSHM5,B10,ATRVCA186WV1B,"[[0, 0]]"
1,4935.png,[unlabeled],3L2OEKSTW9BLL4JK737OPMM7XITY8E,3N2BF7Y2VQVH7ITTH6ILXONG5GSHM5,B11,ATRVCA186WV1B,"[[0, 0]]"
2,4935.png,"[unlabeled, arrowDescriptor, InterObjectLinkage]",3L2OEKSTW9BLL4JK737OPMM7XITY8E,3N2BF7Y2VQVH7ITTH6ILXONG5GSHM5,A1,ATRVCA186WV1B,"[[0, 0], [5, 2], [9, 2]]"
3,4935.png,"[unlabeled, arrowDescriptor, InterObjectLinkage]",3L2OEKSTW9BLL4JK737OPMM7XITY8E,3N2BF7Y2VQVH7ITTH6ILXONG5GSHM5,A0,ATRVCA186WV1B,"[[0, 0], [4, 2], [8, 2]]"
4,4935.png,"[unlabeled, arrowDescriptor, InterObjectLinkage]",3L2OEKSTW9BLL4JK737OPMM7XITY8E,3N2BF7Y2VQVH7ITTH6ILXONG5GSHM5,A3,ATRVCA186WV1B,"[[0, 0], [7, 2], [11, 2]]"


In [9]:
pd.unique(results_df_shining_batch['page'])

array([u'4102.png', u'4935.png', u'5032.png', u'1276.png', u'4126.png',
       u'80.png', u'5049.png', u'5027.png', u'4104.png', u'5003.png',
       u'4092.png', u'1277.png', u'5054.png', u'4988.png', u'4109.png',
       u'5057.png', u'5005.png', u'5002.png', u'4132.png', u'4110.png',
       u'4099.png', u'788.png'], dtype=object)

In [37]:
pd.unique(results_df_shining_batch['worker_id'])

array([u'ATRVCA186WV1B', u'A5SYKRALYXN5Q'], dtype=object)

In [479]:
results_df_shining_batch[results_df_shining_batch['page'] == '1276.png']

Unnamed: 0,page,category,hit_id,assignment_id,id,worker_id,group_n
59,1276.png,"[unlabeled, InterObjectLinkage]",304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,A1,ATRVCA186WV1B,"[[0, 0], [5, 2]]"
60,1276.png,"[unlabeled, InterObjectLinkage]",304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,A0,ATRVCA186WV1B,"[[0, 0], [2, 2]]"
61,1276.png,"[unlabeled, InterObjectLinkage]",304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,A3,ATRVCA186WV1B,"[[0, 0], [4, 2]]"
62,1276.png,"[unlabeled, imageTitle]",304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,T0,ATRVCA186WV1B,"[[0, 0], [1, 1]]"
63,1276.png,"[unlabeled, InterObjectLinkage, InterObjectLin...",304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,T1,ATRVCA186WV1B,"[[0, 0], [2, 1], [5, 3]]"
64,1276.png,"[unlabeled, InterObjectLinkage, InterObjectLin...",304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,B4,ATRVCA186WV1B,"[[0, 0], [2, 3], [3, 1], [6, 1]]"
65,1276.png,[unlabeled],304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,B5,ATRVCA186WV1B,"[[0, 0]]"
66,1276.png,[unlabeled],304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,B6,ATRVCA186WV1B,"[[0, 0]]"
67,1276.png,"[unlabeled, InterObjectLinkage, InterObjectLin...",304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,B7,ATRVCA186WV1B,"[[0, 0], [4, 3], [5, 1], [6, 3]]"
68,1276.png,[unlabeled],304QEQWKZPLR91AF6CLBUGIN0AJ0OJ,3OS46CRSLF0K0G3LZFS8NTH3NVO6VA,B0,ATRVCA186WV1B,"[[0, 0]]"


In [38]:
amt_util.count_pages_in_df(results_df_shining_batch)

92

In [40]:
anno_dir = './baseAnnotations/'
new_anno_dir = './newAnnotations/'

In [41]:
_ = results_df_shining_batch.groupby('page').apply(rbuild.build_and_write_relationships, anno_dir, new_anno_dir)

1265.png.json
16
1265.png.json
16
1265.png.json
16
1266.png.json
16
1269.png.json
6
1270.png.json
7
1272.png.json
10
1273.png.json
11
1275.png.json
18
1276.png.json
6
1277.png.json
9
1306.png.json
9
1308.png.json
19
1312.png.json
9
1313.png.json
4
1502.png.json
6
395.png.json
9
4089.png.json
12
4091.png.json
6
4092.png.json
6
4093.png.json
2
4094.png.json
6
4095.png.json
7
4096.png.json
9
4097.png.json
5
4099.png.json
4
4100.png.json
13
4101.png.json
5
4102.png.json
6
4103.png.json
12
4104.png.json
6
4108.png.json
16
4109.png.json
3
4110.png.json
7
4113.png.json
8
4114.png.json
10
4115.png.json
21
4126.png.json
9
4128.png.json
5
4129.png.json
5
4132.png.json
4
4135.png.json
9
4141.png.json
11
4142.png.json
9
4145.png.json
8
455.png.json
6
4908.png.json
10
4916.png.json
20
4922.png.json
10
4935.png.json
11
4941.png.json
16
4942.png.json
14
4964.png.json
4
4973.png.json
7
4976.png.json
11
4987.png.json
9
4988.png.json
21
4990.png.json
13
5002.png.json
13
5003.png.json
15
5004.png.json
21

Write to record of completed images

In [318]:
# pd.Series(pd.unique(results_df_shining_batch['page'])).to_csv('completed_images.csv', mode='a', index=False)

### consensus

In [237]:
# results_df_question_batch['group_memberships'] = results_df_question_batch['g_list'].apply(lambda x:[el[0] for el in x if el[0] > 0])

In [158]:
# consensus_results_df_current_batch = amt_util.make_consensus_df(results_df_question_batch, 'No Consensus')

In [319]:
question_cats = ['Multiple Choice',
                 'Fill-in-the-Blank',
                 'Short Answer',
                 'Discussion']

In [332]:
question_only_cons_df = consensus_results_df_current_batch[consensus_results_df_current_batch['box_id'].apply(lambda x:x[0] == 'Q' ) | consensus_results_df_current_batch['category'].isin(question_cats)]

In [312]:
no_consensus_hits_cat = consensus_results_df_current_batch[consensus_results_df_current_batch['category'] == 'No Consensus']

flaw_rate = len(no_consensus_hits_cat) / len(consensus_results_df_current_batch)
print 'question boxes without category consensus comprise ' + '{0:0.2f}% '.format(flaw_rate * 100) + 'of the total'  

These numbers differ because-
1. The first counts non-con results from the first round (non-question results)

2. The second counts only boxes either previously marked as a question, or selected in ths round

In [338]:
no_consensus_hits_cat = question_only_cons_df[question_only_cons_df['category'] == 'No Consensus']

flaw_rate = len(no_consensus_hits_cat) / len(question_only_cons_df)
print 'question boxes without category consensus comprise ' + '{0:0.2f}% '.format(flaw_rate * 100) + 'of the total'  

In [315]:
amt_util.write_results_df(consensus_results_df_current_batch, 'annotations-w-questions/', 'simpler-test-questions/')

In [317]:
# consensus_results_df_current_batch[consensus_results_df_current_batch['page'] == 'Spectrum_Science_Grade_8_8.jpeg']

## **sending to review tool**

### randomly sampling HITs 

In [375]:
pages_to_review = np.unique(consensus_results_df_current_batch['page'])

In [376]:
sampling_rate = 0.1
sample_size = int(len(pages_to_review) * sampling_rate)
sampled_pages_to_review = list(np.random.choice(pages_to_review, size= sample_size, replace=False))
print 'sampling ' + str(sample_size) + ' pages out of ' + str(len(pages_to_review))
to_review = ['start_seq'] + sampled_pages_to_review

In [377]:
anno_dir = 'simpler-test-questions/'
amt_util.review_results(to_review, anno_dir)
print 'posting to review tool, navigate to http://localhost:8080/ to see the sampled consensus results'

### sampling no consensus

In [378]:
pages_to_review =pd.unique(no_consensus_hits_cat['page'])

In [379]:
sampling_rate = 0.5
sample_size = int(len(pages_to_review) * sampling_rate)
sampled_pages_to_review = list(np.random.choice(pages_to_review, size= sample_size, replace=False))
print 'sampling ' + str(sample_size) + ' pages out of ' + str(len(pages_to_review))
to_review = ['start_seq'] + sampled_pages_to_review

In [380]:
anno_dir = 'simpler-test-questions/'
amt_util.review_results(to_review, anno_dir)
print 'posting to review tool, navigate to http://localhost:8080/ to see the sampled consensus results'

### looking at individual workers

In [276]:
ind_worker_dir = 'individual-worker-results/'

In [342]:
bad_and_prolific_to_review

In [384]:
# workers_to_review = turkers_contacted_me[:1]
workers_to_review = really_bad_workers_to_review
subset_by_worker = results_df_question_batch[results_df_question_batch['worker_id'].isin(workers_to_review)]

amt_util.write_results_df(subset_by_worker, 'annotations-w-questions/', ind_worker_dir)
pages_to_review = pd.unique(subset_by_worker['page']).tolist()
to_review = ['start_seq'] + pages_to_review 

In [383]:
anno_dir = ind_worker_dir
amt_util.review_results(to_review, anno_dir)
print 'posting to review tool, navigate to http://localhost:8080/ to see the sampled consensus results'

### store cells

In [207]:
multi_choice_pages = consensus_results_df_complete[consensus_results_df_complete['category'] == 'Multiple Choice']
pages_to_review =pd.unique(multi_choice_pages['page'])
to_review = ['start_seq'] + pages_to_review.tolist()

# consensus_results_df_complete = amt_util.make_consensus_df(results_df_question_batch, 'No Consensus')
consensus_results_df_complete = combined_consensus_df

# amt_util.write_results_df(consensus_results_df_complete, 'annotations-w-questions/' ,'labeled-questions/')

spdf = results_df_question_batch[results_df_question_batch['page'] == 'Spectrum_Science_Grade_3_80.jpeg']

amt_util.write_results_df(spdf, 'annotations-w-questions/', 'simpler-test-questions/')

## all pages from the latest batch
pages_to_review =pd.unique(consensus_results_df_complete['page'])
to_review = ['start_seq'] + pages_to_review.tolist()

# all pages from the latest batch with a no-consensus box
# pages_to_review =pd.unique(no_consensus_hits['page'])

pd.Series(to_review[1:]).to_pickle('rev_seq.pkl')

single_page = ['Daily_Science_Grade_2_Evan_Moor_33.jpeg']

problem_pages = pd.read_pickle('problem_pages.pkl').tolist()

to_review = ['start_seq'] + problem_pages

to_review = ['start_seq'] + single_page

suspect_subset = combined_results_df[combined_results_df['worker_id'].isin(bad_and_prolific_to_review)]
# suspect_subset = combined_results_df[combined_results_df['worker_id'].isin(['A3VE5OH94HYHET'])]
amt_util.write_results_df(suspect_subset, 'annotations-w-questions/' ,'labeled-questions2/')
pages_to_review = pd.unique(suspect_subset['page'])

In [150]:
## all pages from the latest batch
pages_to_review =pd.unique(results_df_question_batch['page'])
to_review = ['start_seq'] + pages_to_review.tolist()

In [None]:
%%capture 
# all pages from the latest batch with a no-consensus box
suspect_subset = results_df_current_batch[results_df_current_batch['worker_id'].isin(suspect_workers[:5])]
amt_util.write_results_df(suspect_subset)
pages_to_review = pd.unique(suspect_subset['page'])

# Merging latest round of HITs into combined dataset  

## Load prior complete dataset if not in memory

In [426]:
data_pickled_dir = './store_hit_results_metadata/group_latest_combined/'

In [445]:
combined_results_df = pd.read_pickle(data_pickled_dir + 'complete_df.pkl')

In [436]:
amt_util.count_pages_in_df(combined_results_df)

10

In [440]:
# pages_to_drop = ['78.png']
# excised_df = combined_results_df[~combined_results_df['page'].isin(pages_to_drop)]

## Updating full dataset

In [448]:
# combined_results_df1 = combined_results_df1.append(results_df_shining_batch)

## Working with full dataset

In [16]:
%%capture
no_consensus_hits = combined_consensus_df[combined_consensus_df['category'] == 'No Consensus']

In [964]:
flaw_rate = len(no_consensus_hits) / len(combined_consensus_df)
print 'text boxes without consensus are ' + '{0:0.2f}% '.format(flaw_rate * 100) + 'of the total'  

In [227]:
worker_quality_df[worker_quality_df['submitted'] > 50].sort_values('flaw_ratio', ascending= True).head(25)

In [205]:
bad_and_prolific_workers[:15]

After looking through the top few offenders, it's clear that 

1. the very worst intentionally submitted many blank pages

2. the rest didn't read the directions very closely

3. I'm comfortable rejecting the work of those with > 100 submissions

4. I'll ban the worst 15 from future HITs


In [49]:
amt_util.write_results_df(combined_consensus_df)

In [45]:
# all pages from the complete
pages_to_review =pd.unique(combined_consensus_df['page'])

In [165]:
# all pages from the complete dataset with a no-consensus box
pages_to_review =pd.unique(no_consensus_hits['page'])

In [208]:
%%capture 
# all pages from the latest batch with a no-consensus box
suspect_subset = combined_results_df[combined_results_df['worker_id'].isin(bad_and_prolific_to_review[10:15])]
amt_util.write_results_df(suspect_subset)
pages_to_review = pd.unique(suspect_subset['page'])

In [50]:
sampling_rate = 0.2
sample_size = int(len(pages_to_review) * sampling_rate)
sampled_pages_to_review = list(np.random.choice(pages_to_review, size= sample_size, replace=False))
print 'sampling ' + str(sample_size) + ' pages out of ' + str(len(pages_to_review))
to_review = ['start_seq'] + sampled_pages_to_review

In [51]:
amt_util.review_results(to_review)
print 'posting to review tool, navigate to http://localhost:8080/ to see the sampled consensus results'

# Worker Analysis

## Basic worker stats

In [10]:
print 'number of unique workers:', pd.unique(results_df_shining_batch['worker_id']).shape[0]

number of unique workers: 2


In [24]:
j_id , my_id = tuple(pd.unique(results_df_shining_batch['worker_id']))

In [25]:
pd.unique(results_df_shining_batch[results_df_shining_batch['worker_id'] == my_id]['page'])

array([u'4935.png', u'1276.png', u'5003.png', u'5054.png', u'5002.png',
       u'4132.png'], dtype=object)

### HIT duration for pricing

In [213]:
task_duration_seconds = []
for hit_id, assignments in assignment_results_current_batch.items():
    for assignment in assignments:
        hit_duration = dt_parse.parse(assignment.SubmitTime) - dt_parse.parse(assignment.AcceptTime)
        task_duration_seconds.append(hit_duration.seconds)

In [214]:
task_duration_series = pd.Series(task_duration_seconds)

In [94]:
_ = task_duration_series.hist(bins=30, log=True)

In [220]:
winsorized_durations = [t for t in task_duration_seconds if t < 150]
w_duration_series = pd.Series(winsorized_durations)

In [221]:
_ = pd.Series(w_duration_series).hist(bins=60)
plt.title('Worker task duration', fontsize=50, verticalalignment='bottom', color = b16_colors.b)
plt.ylabel("Number of Workers", fontsize=30, labelpad=10, color = b16_colors.b)
plt.xlabel("Seconds Spent on HIT", fontsize=30, labelpad=10, color = b16_colors.b)
plt.tick_params(axis='x', which='major', labelsize=20)
plt.tick_params(axis='y', which='major', labelsize=20)

In [224]:
dur_mode = w_duration_series.mode().values[0]
dur_median = w_duration_series.median()
print 'duration mode= ' + str(dur_mode)
print 'duration median= ' + str(dur_median)

### HIT pricing check

We'd like the effective hourly rate to be between 10-12 dollars. This is considered equitable in the world of mechanical turk.

In [241]:
hits_per_hour_high = 3600 / dur_mode
hits_per_hour_low = 3600 / dur_median
print 'effective hourly rate = $', hits_per_hour_low * static_params['amount'], 'to' , hits_per_hour_high * static_params['amount']

## Identifying high and low consensus workers

### high

In [244]:
worker_conflicts = consensus_with_workerid_df[consensus_with_workerid_df['category'] != consensus_with_workerid_df['consensus_category']]
all_worker_counts = results_df_question_batch['worker_id'].value_counts()
bad_worker_counts = worker_conflicts['worker_id'].value_counts()
worker_quality_df = pd.DataFrame([all_worker_counts, bad_worker_counts]).T
worker_quality_df.columns=['submitted', 'incorrect']
worker_quality_df['flaw_ratio'] = worker_quality_df['incorrect']/worker_quality_df['submitted']
good_workers = worker_quality_df.sort_values('flaw_ratio', ascending= True).index.tolist()

worker_quality_df.sort_values('flaw_ratio', ascending= True).head(10)

In [263]:
good_and_prolific_workers = worker_quality_df.sort_values('flaw_ratio', ascending= True).head(10).index.tolist()

I became concerned that I missed the perfect performers, but as I suspected perfect workers only did 1-2 HITs at most

In [66]:
# aw_set = set(all_worker_counts.index)
# bw_set = set(bad_worker_counts.index)
# flawless_workers = list(aw_set.difference(bw_set))
# all_worker_counts[all_worker_counts.index.isin(best_workers)]

### low

In [292]:
bad_and_prolific_workers = worker_quality_df.sort_values('flaw_ratio', ascending= False).head(25).sort_values('incorrect', ascending= False)

In [362]:
bad_and_prolific_to_review = list(bad_and_prolific_workers[:5].index)
really_bad_workers_to_review = list(really_bad_workers[:5].index)

In [294]:
bad_and_prolific_workers.head(15)

In [381]:
really_bad_workers = bad_and_prolific_workers[bad_and_prolific_workers['flaw_ratio'] > 0.5]
really_bad_workers

bad worker decisions-

1- didn't read directions

2- many blank pages

3- many blank pages

4- incomplete

the really bad workers are largely incomplete

## Email contact with workers

In [253]:
turkers_contacted_me = turkmail_util.get_latest_worker_communication()

In [257]:
turkers_contacted_me = [w_id[0] for w_id in turkers_contacted_me.values()]

In [258]:
turkers_contacted_me

In [269]:
good_set = set(good_and_prolific_workers)
bad_set = set(bad_and_prolific_to_review)
contact_set = set(turkers_contacted_me)

In [271]:
print good_set.intersection(contact_set)
print bad_set.intersection(contact_set)

## Messaging workers

In [889]:
subject = "More science book annotation HITs are available"
message = """
Hello, 

If you're receiving this message you were among the top performers on the first group HITs I submitted.
I've submitted another group of HITs, with more to follow in the next few days. 
I've added some additional instructions, and have removed the default instruction page (they can still be accessed by Read
Instruction Button).

Happy to get any feedback you might have for the new HITs.
"""

In [693]:
# _ = mturk.notify_workers(good_workers[:20], subject, message)

# **HIT end-of-life**

## Pickle latest results

In [42]:
#reset as needed
gn = 2

In [43]:
next_group = gn + 1
group_n = '_' + str(gn) + '/'

temp_store_dir = './store_hit_results_metadata/group'
try:
    os.mkdir(temp_store_dir + group_n)
except:
    OSError
    
result_file_name = 'hit_info.pkl'
assignment_file_name = 'assignment_info.pkl'
raw_results_file_name = 'raw_res_df.pkl'
complete_results_file = 'complete_df.pkl'
consensus_results_file = 'consensus_df.pkl'

amt_util.pickle_this(r_hits_current_batch, temp_store_dir + group_n + result_file_name)
amt_util.pickle_this(assignment_results_current_batch, temp_store_dir + group_n + assignment_file_name)
# amt_util.pickle_this(results_df_question_batch, temp_store_dir + group_n + raw_results_file_name)
results_df_shining_batch.to_pickle(temp_store_dir + group_n + raw_results_file_name)
print 'saved HIT batch number ' + str(gn)
print 'now onto batch ' +str(next_group) 
gn = next_group

saved HIT batch number 2
now onto batch 3


## Pickle combined dataset

In [443]:
temp_store_dir = './store_hit_results_metadata/group'
group_n = '_latest_combined/'
try:
    os.mkdir(temp_store_dir + group_n)
except:
    OSError
    
complete_results_file = 'complete_df.pkl'
combined_results_df.to_pickle(temp_store_dir + group_n + complete_results_file)

## Accepting and deleting HITs... careful with these

Uncomment only when ready to accept or delete hits

### accepting HITS

### rejecting HITS, banning bad workers

In [122]:
def reject_bad_assignments(rejected_assignment_ids, rejected_worker_ids):
    for assignment in rejected_assignment_ids:
        mturk.reject_assignment(assignment)
        
        rejection_message_subject = "One of your HITs was rejected"
        
        rejection_message = """
        Your HIT was rejected because it was either incomplete or largely incorrect.
        """
        
        mturk.notify_workers(rejected_worker_ids, rejection_message_subject, rejection_message)

In [155]:
workers_to_ban = bad_and_prolific_workers[:2].index.tolist()

In [156]:
workers_to_ban

In [157]:
number_rejected_assignments, number_rejected_workers = amt_util.reject_assignments(mturk, workers_to_ban, combined_consensus_with_workerid_df)
print 'rejecting ' + str(number_rejected_assignments) + ' assignments' + ' from ' + str(number_rejected_workers) + ' workers'

In [183]:
assignment_results_after_rejects = amt_util.get_assignments(mturk, r_hits_after_rejects)

In [174]:
# assignments_to_accept = []
assignment_status = []
for hit_id, assignments in assignment_results_after_rejects.items():
    for assignment in assignments:
        assignment_status.append(assignment.AssignmentStatus)
        if assignment.AssignmentStatus == 'Submitted':
            assignments_to_accept.append(assignment)
status_series = pd.Series(assignment_status)

In [182]:
r_hits_after_rejects = amt_util.get_completed_hits(mturk)

### deleting all HITS

In [414]:
# amt_util.delete_all_hits(mturk)

# End

In [47]:
images_to_test = ['1262.png', '1311.png', '1315.png', '1503.png', '193.png', '2822.png', '4106.png', '4138.png', '4930.png', '4947.png', '4949.png', '61.png','77.png']

In [None]:
with open('bad_images.pkl',) as f:
    bad_images = pickle.load(f)

bad_pages = [app_url_base + page + '&hd=' + str(img_dim_lookup[page][0]) +'&vd=' + str(img_dim_lookup[page][1]) for page in bad_images]
# pages_to_use = [app_url_base + page + '&hd=' + str(img_dim_lookup[page][0]) +'&vd=' + str(img_dim_lookup[page][1]) for page in bad_images]