# Table of Contents
* [Setup - Stage 3](#Setup---Stage-3)
	* [estimate cost](#estimate-cost)
	* [load dataset](#load-dataset)
* [Submitting HITs](#Submitting-HITs)
* [Retrieve results](#Retrieve-results)
	* [check results](#check-results)
* [Interact with workers](#Interact-with-workers)
	* [reject assignments and ban workers](#reject-assignments-and-ban-workers)
	* [pay bonuses](#pay-bonuses)
* [Accepting and deleting HITs](#Accepting-and-deleting-HITs)


In [3]:
%%capture
from __future__ import division
import numpy as np
import pandas as pd
import scipy.stats as st
import itertools
import math
from collections import Counter, defaultdict
%load_ext autoreload
%autoreload 2


import matplotlib as mpl
mpl.use("Agg")
import matplotlib.pylab as plt
%matplotlib inline
%load_ext base16_mplrc
%base16_mplrc dark solarized
plt.rcParams['grid.linewidth'] = 0
plt.rcParams['figure.figsize'] = (16.0, 10.0)


import re
import pickle
import boto
from copy import deepcopy
import json
import os
import jinja2
import random
from tqdm import tqdm

from IPython.core.display import HTML

from boto.mturk.qualification import PercentAssignmentsApprovedRequirement, Qualifications, Requirement

from keysTkingdom import mturk_ai2
from keysTkingdom import aws_tokes
from keysTkingdom import mturk_aristo


from amt_utils.mturk import MTurk
from amt_utils.mturk import pickle_this, unpickle_this

# Setup - Stage 3

## estimate cost

In [None]:
cost_per_hit = 0.04
n_turkers_per_hit = 3
n_hits_per_video = 1

simpler_cost_per_hit = 0.02

n_videos_low = 200
n_videos_high = 52313
# n_videos_high = 20000

low = cost_per_hit * n_turkers_per_hit * n_videos_low * n_hits_per_video
high = cost_per_hit * n_turkers_per_hit * n_videos_high * n_hits_per_video
subtask = simpler_cost_per_hit* n_turkers_per_hit * n_videos_high * n_hits_per_video * 1.38

stage_2 = 0.02 * n_turkers_per_hit * n_videos_high + 0.01 * n_turkers_per_hit * n_videos_high

stage_3 = 0.04 * n_turkers_per_hit * n_videos_high * 0.10 +  simpler_cost_per_hit * n_turkers_per_hit * n_videos_high

stage_4 = 0.05 * 1 * n_videos_high

stage_1 = high + subtask

print(str(low) + ' - $' + str(high))
print(subtask)
print(high + subtask)
print(stage_1, stage_2, stage_3, stage_4)
print(stage_1 + stage_2 + stage_3 + stage_4)

## load dataset

In [53]:
s3_base_path = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/subtask_frames/'

s3_base_path_stills = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/still_frames/'

s3_base_path_gifs = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/scene_gifs/'

s3_base_paths = {
    'subtask': s3_base_path,
    'stills': s3_base_path_stills,
    'gifs': s3_base_path_gifs
}

data_file_path_defunct = '../../build_dataset/stage_1_single_char.pkl'
data_file_path = '../../build_dataset/stage_1_single_char_updated.pkl'

ds_stage_1 = unpickle_this(data_file_path)
# ds_stage_1_selected = unpickle_this('.pkl')
all_stills_to_annotate = []

# for clip in ds_skeleton:
#     for still in clip['keyFrames']:
#         all_stills_to_annotate.append(still)
# len(all_stills_to_annotate)

In [77]:
beta_scene_descriptions = unpickle_this('stage_3_beta_gold_settings.pkl')

In [54]:
prod1_scene_descriptions = unpickle_this('stage3_prod1_settings.pkl')

In [315]:
dataset_v0p1 = unpickle_this('dataset_v0p1.pkl')

In [382]:
remaining_videos = dataset_v0p1[2000:]

In [383]:
videos_to_pass = unpickle_this('../stage_1/vids_to_pass_9_8.pkl')

In [384]:
remaining_videos = [vid for vid in remaining_videos if vid['globalID'] in videos_to_pass]

In [385]:
len(remaining_videos)

6899

In [316]:
# test_3_sample = random.sample(dataset_v0p1, 100)

In [5]:
# all_prod_aids = [aid[:-7] for aid in all_stills_to_annotate]

In [473]:
# with open('complete_clips.txt', 'r') as f:
#     clips_completed_raw = f.read()

# clips_completed = set(clips_completed_raw.split())

In [None]:
%%capture
from __future__ import division
import numpy as np
import pandas as pd
import scipy.stats as st
import itertools
import math
from collections import Counter, defaultdict
%load_ext autoreload
%autoreload 2

In [317]:
# multi_char_vids = unpickle_this('stage_1_multi_char_sample.pkl')

In [80]:
len(multi_char_vids)

4977

In [81]:
len(prod1_scene_descriptions)

5000

In [82]:
stage3a_done_multi = []
for anno in multi_char_vids:
    vid = anno['globalID']
    if vid in prod1_scene_descriptions:
        anno['setting']['location'] = prod1_scene_descriptions[vid]
        stage3a_done_multi.append(anno)

In [83]:
len(stage3a_done_multi)

2190

In [84]:
# stage_3b_beta2 = random.sample(stage3a_done_multi, 100)

# Submitting HITs

In [24]:
from amt_utils.flintstones import prepare_stage_3_hit
from amt_utils.flintstones import prepare_stage_3b_hit

from amt_utils.flintstones import generate_simpler_task_page
from amt_utils.flintstones import write_task_page

from amt_utils.mturk import expected_cost

In [392]:
turk_account = mturk_ai2
rw_host='mechanicalturk.amazonaws.com'
amt_con = MTurk(turk_account.access_key, turk_account.access_secret_key, host=rw_host)
# amt_con = MTurk(turk_account.access_key, turk_account.access_secret_key)
amt_con.get_account_balance()

$2,832.27

In [328]:
static_params = {
    'title': "Describe what happens in a short animation clip",
    'description': "Write 2-3 sentences describing what happens in a scene.",
    'keywords': ['animation', 'image', 'image annotation'],
    'frame_height': 1000,
    'amount': 0.10,
    'duration': 3600 * 1,
    'lifetime': 3600 * 24 * 2,
    'max_assignments': 1,
#     'locales': ['US'],
    'locales': ['US', 'CA', 'AU', 'NZ', 'GB'],
#     'locales': ['IN]
}

In [399]:
build_hit_group = []
for vid_ann in tqdm(remaining_videos[6000:]):
    build_hit_group.append(prepare_stage_3b_hit(s3_base_paths, vid_ann, static_params))
write_task_page(build_hit_group[0]['html'])

100%|██████████| 899/899 [00:06<00:00, 137.87it/s]


In [400]:
expected_cost(build_hit_group, static_params, amt_con)

89.9

**Procedure**

1. assure interface template is working

2. build and load ds skeleton for the sample to be submitted

3. make sure to sync images to s3

4. double check static params and host

5. build and submit

In [402]:
# hit_group = [amt_con.create_html_hit(single_hit) for single_hit in tqdm(build_hit_group)]

# Retrieve results

In [10]:
from amt_utils.flintstones import get_assignments
from amt_utils.flintstones import filter_hits_by_date
from amt_utils.flintstones import filter_hits_by_completion
from amt_utils.flintstones import filter_hits_by_status
from amt_utils.flintstones import get_completed_hits
from amt_utils.flintstones import get_assignments
from amt_utils.mturk import pickle_this

In [11]:
import datetime
import dateutil.parser as dt_parse

In [345]:
%%time
all_hits = amt_con.get_reviewable_hits(detailed=False)

CPU times: user 467 ms, sys: 212 ms, total: 679 ms
Wall time: 18.1 s


In [410]:
%%time
all_hits = amt_con.get_all_hits()

CPU times: user 4.73 s, sys: 917 ms, total: 5.65 s
Wall time: 2min 24s


In [411]:
len(all_hits)

20081

In [412]:
pickle_this(all_hits, 'all_hits_9_13.pkl')

In [4]:
full_hits = unpickle_this('./stage_3_full_hits_8_30.pkl')

In [6]:
flattened_hits = [item for sublist in full_hits for item in sublist]

In [301]:
%%time
all_hits = amt_con.get_all_hits()

CPU times: user 2.58 s, sys: 322 ms, total: 2.9 s
Wall time: 1min 15s


In [413]:
start_date = (2017, 9, 8)
end_date = (2017, 9,13)
most_recent_hits = filter_hits_by_date(tqdm(all_hits), start_date, end_date)
# recent_hits = filter_hits_by_status(recent_hits)

100%|██████████| 20081/20081 [00:02<00:00, 7891.55it/s]


In [414]:
len(most_recent_hits)

6899

In [362]:
test_hits = set([hit.HITId for hit in recent_hits])

In [415]:
pickle_this(most_recent_hits, 'recent_hits_9_13.pkl')

In [363]:
mrh = [hit for hit in most_recent_hits if hit.HITId not in test_hits]

In [364]:
len(most_recent_hits)

2000

In [306]:
len(recent_hits)

100

In [230]:
th1 = recent_hits[0]

In [236]:
task_b_hits = [hit for hit in recent_hits if hit.Title == static_params['title'] ]

In [237]:
len(task_b_hits)

100

In [39]:
creation_times = pd.Series([dt_parse.parse(hit.CreationTime) for hit in recent_hits])

In [199]:
# _ = creation_times.hist(bins = 10)

In [365]:
results = get_assignments(amt_con.connection, mrh)

In [366]:
len(results)

2000

In [353]:
# pickle_this(results, 'stage_3b_beta_1.pkl')

In [367]:
pickle_this(results, 'stage_3b_prod1_results.pkl')

In [312]:
# all_results = get_assignments(amt_con.connection, all_hits)

In [311]:
len(all_results)

11182

## check results

In [171]:
main_characters =  {"fred",
             "barney",
              "wilma",
              "betty",
              "pebbles",
              "bamm bamm",
              "dino",
              "mr slate",
              "baby puss",
              "hoppy",
              "empty frame",
              "no characters"}

def create_result(assmt):
    result = json.loads(assmt.answers[0][0].fields[0])
    result['h_id'] = assmt.HITId
    result['worker_id'] = assmt.WorkerId
    return result


def all_main(char_set, main_chars=main_characters):
    return not bool(char_set.difference(main_chars))

In [172]:
tar1 = ar.answers[0][0]

tar1.fields

NameError: name 'ar' is not defined

In [173]:
assignments =[]
for assignment_triple in list(results.values()):
    assignments.extend(assignment_triple)

assignment_results = [create_result(ar) for ar in assignments]
# characters_present = [{'h_id': anno['h_id'], 'w_id': anno['worker_id'], 'still_id': anno['stillID'], 'characters': set([ch['label'] for ch in json.loads(anno['characterBoxes'])])} for anno in assignment_results]

TypeError: list indices must be integers, not str

In [179]:
results

defaultdict(list,
            {u'33W1NHWFYILWZH7JFBLAD3PUK9RZTI': [],
             u'37MQ8Z1JQFW2AS310AZVY972E2B2Y3': [],
             u'3CESM1J3EJ3M3OR28BTA3P3YJZD6WP': [],
             u'3D7VY91L66XW4OEJ004V57NZZYABMO': [],
             u'3KA7IJSNW752PRBWSGWNMBHU43LBPK': [],
             u'3NCN4N1H1HH2M6ZJC1JVIMCZ4OKBNO': [],
             u'3S4TINXCC1NK4RZVDGJ8XBC4UYPBO3': [],
             u'3S8A4GJRD439OZRXI1OEHNKKCQB6VF': [],
             u'3Y7LTZE0YUMBZUM8FGBKIZBW3NYZU9': [],
             u'3YLPJ8OXX9DND1SEZSWH8AXRWAD4XV': [<boto.mturk.connection.Assignment at 0x109c83690>]})

In [184]:
ta = results['3YLPJ8OXX9DND1SEZSWH8AXRWAD4XV'][0]

In [185]:
ta

<boto.mturk.connection.Assignment at 0x109c83690>

In [186]:
ta.answers[0][0].fields

[u'[{"image_url":"https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/scene_gifs/s_05_e_16_shot_021591_021665.gif","description":"blue birds"}]']

In [514]:
characters_present

[{'characters': {u'judge'},
  'h_id': u'31S7M7DAGHQCUSQF0AU15OEW4LOLTV',
  'still_id': u's_06_e_04_shot_014628_014702_40.png',
  'w_id': u'AO235WMFCB1D9'},
 {'characters': {u'hoppy'},
  'h_id': u'3FI30CQHVLJUFLTRDKXEGBQD8LQ6BD',
  'still_id': u's_05_e_22_shot_037596_037670_40.png',
  'w_id': u'AO235WMFCB1D9'},
 {'characters': {u'wilma'},
  'h_id': u'35XW21VSVHEBFSD0BAPBVJIA22ALS6',
  'still_id': u's_02_e_29_shot_005091_005165_40.png',
  'w_id': u'AO235WMFCB1D9'},
 {'characters': {u'fred', u'wilma'},
  'h_id': u'3W3RSPVVGTRRU5540FKBAK0YNZVLUM',
  'still_id': u's_04_e_16_shot_018813_018887_40.png',
  'w_id': u'AO235WMFCB1D9'},
 {'characters': {u'barney', u'door', u'fred'},
  'h_id': u'3HUR21WDDVP3B86YOZKOOZ1YQY6YXD',
  'still_id': u's_01_e_24_shot_033229_033303_40.png',
  'w_id': u'AO235WMFCB1D9'},
 {'characters': {u'hat'},
  'h_id': u'3SMIWMMK62575ZH34TRDJVHEL8PUWI',
  'still_id': u's_02_e_08_shot_019539_019613_40.png',
  'w_id': u'AO235WMFCB1D9'},
 {'characters': {u'barney', u'roof'},


# Interact with workers

## reject assignments and ban workers

In [37]:
def ban_bad_workers(mturk_connection, worker_ids):
    for worker in worker_ids:
        reason_for_block = """
        Your HITs contained many frames with characters, but they were marked 'empty frame'
        """
        print('blocking ' + str(worker))
        mturk_connection.block_worker(worker, reason_for_block)

In [38]:
workers_to_ban = unpickle_this('workers_to_ban_prod_1_4.pkl')

In [39]:
workers_to_ban

{u'A13ISH3PAGHO0A',
 u'A15DOG6AXQ7LGY',
 u'A1CF1W8CP0DHB0',
 u'A1IOMFFEKCWOIT',
 u'A1QP6651C4Q8NS',
 u'A1RULXRXR2XF6M',
 u'A273GB71FE8RIV',
 u'A2CXEAMWU2SFV3',
 u'A2L97S8Z5FZK5N',
 u'A2NJTYYY37DNO',
 u'A2PU4YNWITAQVL',
 u'A2X7BE2FSPUKCK',
 u'A3R6KO03P9QLNO',
 u'A3UPV16IECELHV',
 u'A9ALF1SODWQE5'}

In [40]:
ban_bad_workers(amt_con.connection, list(workers_to_ban))

blocking A2PU4YNWITAQVL
blocking A273GB71FE8RIV
blocking A3R6KO03P9QLNO
blocking A2X7BE2FSPUKCK
blocking A1QP6651C4Q8NS
blocking A2CXEAMWU2SFV3
blocking A2NJTYYY37DNO
blocking A1CF1W8CP0DHB0
blocking A3UPV16IECELHV
blocking A2L97S8Z5FZK5N
blocking A1IOMFFEKCWOIT
blocking A15DOG6AXQ7LGY
blocking A9ALF1SODWQE5
blocking A1RULXRXR2XF6M
blocking A13ISH3PAGHO0A


In [46]:
to_reject = unpickle_this('assignments_to_reject_prod_1_4.pkl')

In [48]:
len(to_reject) * 0.04

378.16

In [43]:
def reject_assignments(mturk_connection, assignments_to_reject):
    feedback_message = """
    Your HITs contained many frames with characters, but they were marked 'empty frame'
    """
    reject_count = len(assignments_to_reject)
    for assignment_id in tqdm(assignments_to_reject):
        try:
            mturk_connection.reject_assignment(assignment_id, feedback_message)
        except boto.mturk.connection.MTurkRequestError:
            print('assignment ' + str(assignment_id) + ' already accepted or rejected')

    return reject_count

In [45]:
# reject_assignments(amt_con.connection, list(to_reject))

## pay bonuses

In [None]:
subject = "More  HITs are available"
message = """
Hello, 

"""

In [None]:
# _ = mturk.notify_workers(good_workers[:20], subject, message)

In [455]:
bonuses = unpickle_this('bonuses_to_pay_3.pkl')

In [458]:
def pay_bonuses(bonuses_to_pay):
    total_payed = 0
    bonus_reason_template = 'For Flinstones character labels written.'
    for aid, vals in tqdm(bonuses_to_pay.items()):
        n_chars = vals['count']
        wid = vals['worker_id']
        bonus_ammount = boto.mturk.price.Price(0.01 * n_chars)
        total_payed += bonus_ammount.amount
        bonus_reason = bonus_reason_template
        amt_con.connection.grant_bonus(wid, aid, bonus_ammount, bonus_reason)
    return total_payed

In [460]:
# pay_bonuses(bonuses)

# Accepting and deleting HITs

Uncomment only when ready to accept or delete hits

reject assignments carefully

In [416]:
latest_results = unpickle_this('all_results_9_13.pkl')

In [417]:
len(latest_results)

20081

In [418]:
latest_assignments =[]
for assignment_triple in list(latest_results):
    latest_assignments.extend(assignment_triple)

len(latest_assignments)

41760

In [354]:
ta1 = latest_assignments[0]

In [419]:
assignments_statuses = [asgmt.AssignmentStatus for asgmt in latest_assignments]

In [420]:
pd.Series(assignments_statuses).value_counts()

Approved     34917
Submitted     6843
dtype: int64

In [350]:
# to_reject = unpickle_this('./assignments_to_reject_prod_1_4.pkl')
# len(to_reject)

In [421]:
assignments_to_accept = [asgmt for asgmt in latest_assignments if asgmt.AssignmentStatus == 'Submitted']

In [422]:
len(assignments_to_accept)

6843

In [423]:
e_count = 0
for assignment in tqdm(assignments_to_accept):
    try:
        amt_con.approve_assignment(assignment)
    except boto.mturk.connection.MTurkRequestError as e:
        e_count += 1

100%|██████████| 6843/6843 [19:57<00:00,  4.87it/s] 


In [373]:
e_count

0

In [None]:
# _ = [amt_con.disable_hit(hit) for hit in hits_today]

In [None]:
# number_rejected_assignments, number_rejected_workers = amt_util.reject_assignments(mturk, workers_to_ban, combined_consensus_with_workerid_df)
# print 'rejecting ' + str(number_rejected_assignments) + ' assignments' + ' from ' + str(number_rejected_workers) + ' workers'

In [282]:
amt_con.get_account_balance()

$10,000.00

In [283]:
amt_con.delete_all_hits()