# Table of Contents
* [Setup](#Setup)
	* [estimate cost](#estimate-cost)
	* [load dataset](#load-dataset)
* [Submitting HITs](#Submitting-HITs)
* [Retrieve results](#Retrieve-results)
* [Interact with workers](#Interact-with-workers)
* [Accepting and deleting HITs... careful with these](#Accepting-and-deleting-HITs...-careful-with-these)


In [1]:
%%capture
%load_ext autoreload
%autoreload 2

import pickle
import boto3
import json
import os
from copy import deepcopy
from tqdm import tqdm

from IPython.core.display import HTML

from keysTkingdom import mturk_ai2

from mturk.mturk import MTurk

In [612]:
import os
import random
import datetime

In [10]:
def unpickle_this(file_name):
    with open(file_name, 'rb') as f:
        results_df = pickle.load(f)
    return results_df

# data

In [5]:
with open('./data/complete_dataset_3_3.pkl', 'rb') as f:
    complete_ds = pickle.load(f)

In [6]:
s3_base_path = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/still_frames/'

# Submitting HITs- metric collection

In [40]:
mt_client = MTurk(mturk_ai2.access_key, mturk_ai2.access_secret_key)

Account balance is: $10000.00


In [148]:
static_params = {
    'Title': "sentence word marking",
    'Description': "task description",
    'Keywords': 'task, key, words',
    'frame_height': 1000,
    'Reward': f'{0.01}',
    'AssignmentDurationInSeconds': 3600 * 1,
    'LifetimeInSeconds': 3600 * 24 * 2,
    'MaxAssignments': 1,
}

template_params = {
    'template_file': 'task_template_v2.html', 
    'template_dir': 'hit_templates'
}

In [82]:
sent_len_threshold = 30
current_batch_short_sent = {vid: desc for vid, desc in working_corrected.items() if sent_len_threshold > len(desc.split())}
current_batch_very_long_sent = {vid: desc for vid, desc in working_corrected.items() if sent_len_threshold <= len(desc.split())}

sents = [{'image_id': vid, 'formatted_description': sent} for vid, sent in current_batch_short_sent.items()]
sent = sents[0]

In [888]:
cat_headers = {
    'entities': 'Are any of the following people/places/things missing from the video?',
    'verbs': 'Do all of the actions highlighted in yellow take place in the video?'
}

In [887]:
verb_blacklist = set(['is', 'are', 'was'])

In [1106]:
def gen_template_args_from_datapoint_v1(video):
    description = video.description()
    if description[-1] != '.':
        description += '.'
    description = description.replace('\'', '')
    formatted_description = [[word.encode('utf8').decode() for word in sent.split()] for sent in description.split('.')][:-1]
    return {'image_id': video.gid(), 'formatted_description': formatted_description}

def gen_template_args_from_datapoint(video):
    description = video.description()
    if description[-1] != '.':
        description += '.'
    description = description.replace('\'', '')
    formatted_description = [[word.encode('utf8').decode() for word in sent.split()] for sent in description.split('.')][:-1]
    
    vid_ents = [[c.data()['entityLabel']] for c in video.data()['characters'] + video.data()['objects']] + [[video.setting()]]
    vid_verbs = [word[0] for word in video.data()['parse']['pos_tags'] if word[1] in ['VBG', 'VBZ', 'VBP'] and word[0] not in verb_blacklist]
    
    ent_button_js = form_button_arr_js(vid_ents, [v[0] for v in vid_ents], ('entities', cat_headers['entities']))
    verb_button_js = form_button_arr_js(formatted_description, vid_verbs, ('verbs', cat_headers['verbs']))
    
    return {'image_id': video.gid(), 'verb_button_js': verb_button_js, 'ent_button_js': ent_button_js}

def form_button_arr_js(buttons, active, category=None):
    if not buttons:
        return ''
    button_arr_js = f"<br><h4>{category[1]}</h4>"
    for i, sent in enumerate(buttons):
        button_arr_js += '<br>\n'
        for j, word in enumerate(sent):
            b_id = '_'.join(['_'.join(word.split()), str(i), str(j)])
            disabled = '' if word in active else 'disabled=true' 
            color = '#FFFF4C' if word in active else '' 
            button_arr_js += f"<input type='button' onclick=\"record_value('{b_id}');\" value='{word}' id= '{b_id}' {disabled}  style=background-color:{color} />"    
    button_arr_js += f"<br><input type='button' onclick=\"record_value('{category[0] + '_None'}');\" style=background-color:#ff0000 value='None Missing' id='{category[0] + '_None'}' />"
    return button_arr_js

In [1140]:
test_vid = complete_ds.get_video('s_06_e_13_shot_041125_041199')

In [1146]:
complete_ds.get_video('s_01_e_03_shot_023155_023229').description()

'Wilma and Betty are in the back yard. They are leaning against either side of a wall. Betty is talking to Wilma. Then Wilma answers.'

In [1141]:
demo_vid = deepcopy(test_vid)

In [1154]:
# for char in demo_vid.data()['characters']:
#     if char.data()['entityLabel'] == 'barney':
#         char.data()['entityLabel'] = 'wilma'
# demo_vid.data()['description'] = demo_vid.description().replace('Barney', 'Wilma')

# demo_vid.data()['setting'] = 'kitchen'
demo_vid.data()['characters'] = complete_ds.get_video('s_01_e_03_shot_023155_023229').data()['characters']
demo_vid.data()['objects'] = complete_ds.get_video('s_01_e_03_shot_023155_023229').data()['objects']
demo_vid.data()['parse'] = complete_ds.get_video('s_01_e_03_shot_023155_023229').data()['parse']

demo_vid.data()['setting'] = complete_ds.get_video('s_01_e_03_shot_023155_023229').setting()
demo_vid.data()['description'] = complete_ds.get_video('s_01_e_03_shot_023155_023229').description()

In [1158]:
mt_client.preview_hit_interface(template_params, **gen_template_args_from_datapoint(demo_vid))

In [1153]:
resp = mt_client.create_html_hit(static_params, template_params, **gen_template_args_from_datapoint(test_vid))

In [986]:
test_vid = random.choice(complete_ds.data)

# Retreive results

In [1181]:
# del_resp = [mt_client.client.delete_hit(HITId=v['HITId']) for v in all_hits]

In [1189]:
def get_all_hits():
    response = mt_client.client.list_hits(MaxResults=100)
    return response['HITs']

def expire_hits(hits, exp_date=datetime.datetime(2001, 1, 1)):
    responses = [mt_client.client.update_expiration_for_hit(HITId=h['HITId'], ExpireAt=exp_date) for h in hits]

def delete_hits(hits):
    responses = [mt_client.client.delete_hit(HITId=h['HITId']) for h in hits]

def force_delete_hits(hits):
    expire_hits(hits)
    delete_hits(hits)
    
def set_hits_reviewing(hits):
    responses = [mt_client.client.update_hit_review_status(HITId=h['HITId'], Revert=False) for h in hits]
    
def revert_hits_reviewable(hits):
    responses = [mt_client.client.update_hit_review_status(HITId=h['HITId'], Revert=True) for h in hits]

In [1172]:
def get_all_assignments(mt_client, all_hits=[]):
    assignments = []
    if not all_hits:
        all_hits = mt_client.list_hits(MaxResults=100)['HITs']
    for hit in all_hits:
        assignments.append(mt_client.list_assignments_for_hit(
            HITId=hit['HITId'],
            AssignmentStatuses=['Submitted', 'Approved'],
            MaxResults=10)
        )
    return assignments

def approve_assignments(assignments):
    for hit in assignments:
        for assignment in hit['Assignments']:
            print(assignment['AssignmentStatus'])
            if assignment['AssignmentStatus'] == 'Submitted':
                assignment_id = assignment['AssignmentId']
                print('Approving Assignment {}'.format(assignment_id))
                mt_client.client.approve_assignment(
                    AssignmentId=assignment_id,
                    RequesterFeedback='good',
                    OverrideRejection=False,
                )

In [1173]:
assignments = get_all_assignments(mt_client.client, all_hits)

In [1174]:
approve_assignments(assignments)

In [634]:
# mt_client.client.list_hits(MaxResults=100)

In [None]:
# def delete_all_hits(mt_client):
#     all_hits = mt_client.client.list_hits(MaxResults=100)
#     expire_early = [mt_client.client.update_expiration_for_hit(HITId=v['HITId'], ExpireAt=datetime(2000, 1, 1)) for v in all_hits['HITs']]

## check results

In [506]:
main_characters =  {"fred",
             "barney",
              "wilma",
              "betty",
              "pebbles",
              "bamm bamm",
              "dino",
              "mr slate",
              "baby puss",
              "hoppy",
              "empty frame",
              "no characters"}

def create_result(assmt):
    result = json.loads(assmt.answers[0][0].fields[0])
    result['h_id'] = assmt.HITId
    result['worker_id'] = assmt.WorkerId
    return result


def all_main(char_set, main_chars=main_characters):
    return not bool(char_set.difference(main_chars))

In [513]:
assignments =[]
for assignment_triple in list(results.values()):
    assignments.extend(assignment_triple)

assignment_results = [create_result(ar) for ar in assignments]
characters_present = [{'h_id': anno['h_id'], 'w_id': anno['worker_id'], 'still_id': anno['stillID'], 'characters': set([ch['label'] for ch in json.loads(anno['characterBoxes'])])} for anno in assignment_results]