In [1]:
import mturk
import random
from datetime import datetime
import json
from pprint import pprint
import copy
import numpy as np
from collections import OrderedDict
import pandas as pd
import pymongo
import botocore
import uuid
import pdb
import seaborn as sns
import matplotlib.pyplot as plt
#import pingouin as pg
import krippendorff_alpha as ka

# Prep

In [2]:
data_folder = '../data/campaign/'
config_folder = '../config/campaign/'

with open('../data/language_tests.json','r',encoding='utf-8') as f:
    language_tests = json.load(f)
with open('../config/mongodb_credentials.json','r') as f:
    mongodb_credentials = json.load(f)
    #mongodb_credentials["connection_string"]

In [3]:
""" Connect to MTurk and to the Mongodb database. Set the boolean below to TRUE to use the marketplace and to FALSE to use the sandbox (testing the HITs)"""
create_hits_in_production = True
is_pilot = False

db_client = pymongo.MongoClient(mongodb_credentials["connection_string"])
db = db_client['textual_entailment']

collection_name = 'hit_results' + ('_sandbox' if not create_hits_in_production else '') + ('_pilots' if is_pilot else '')

hit_result_collection = db[collection_name]
hit_result_collection

Collection(Database(MongoClient(host=['cluster0-shard-00-00.hjstc.mongodb.net:27017', 'cluster0-shard-00-02.hjstc.mongodb.net:27017', 'cluster0-shard-00-01.hjstc.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', authsource='admin', replicaset='Cluster0-shard-0', ssl=True), 'textual_entailment'), 'hit_results')

In [4]:
from importlib import reload
reload(mturk)
mt = mturk.MTurk()
mt.launch_client(create_hits_in_production)
collection_name

47861.00


'hit_results'

In [5]:
""" Ban the spammers! """
to_ban = False
if to_ban:
    with open('./config/banlist.json','r') as f:
        banlist = json.load(f)
    for w in banlist:
        try:
            response = mt.client.create_worker_block(
                WorkerId=w,
                Reason='You are copy and pasting text'
            )
            assert(response['ResponseMetadata']['HTTPStatusCode'] == 200)
        except botocore.exceptions.ClientError as e:
            continue

# Exec

In [6]:
""" Create the tasks by populating the HTML templates using the config file """

task_types = ['single','multiple']
task_type = task_types[1]

with open(config_folder + 'task_config_{}.json'.format(task_type),'r') as f:
    task_content = json.load(f)

TaskAttributes = task_content['task_attributes']

with open(task_content['html_layout'], 'r', encoding='utf-8') as f:
    html_layout = f.read()
    

html_layout = html_layout.replace('${time_thr}$', task_content['time_thr'])
if is_pilot:
    html_layout = html_layout.replace('${pilot_wording}$',
    '''
    <li>
        <span style="color:red">This is a pilot.</span> Helpful and constructive feedback, regardless of
        whether you finished the task, will be compensated. If there are technical errors that prevented
        you from finishing, let us know and we will take them into consideration.
    </li>
    '''                              
    )
else:
    html_layout = html_layout.replace('${pilot_wording}$','')
    
with open(data_folder + task_content['tasks'],'r') as f:
    taskSets = json.load(f)
    
# If you're only testing, just pick one hit and run it once, with no qualification barriers
if not create_hits_in_production:
    TaskAttributes.pop('QualificationRequirements')
    TaskAttributes['MaxAssignments'] = 1 
    random.seed(42)
    #taskSets = random.sample(taskSets,1)
        
taskSets_dict = {
    'html_layout' : html_layout,
    'taskSets' : taskSets,
    'TaskAttributes' : TaskAttributes,
    'task_content': task_content
}
      
print(f'Generated {len(taskSets)} tasks with the following configs:')

pprint(TaskAttributes,indent=1) #verify the properties before running the HITs

Generated 91 tasks with the following configs:
{'AssignmentDurationInSeconds': 10800,
 'Description': 'Help us by fact-verifying an affirmation. You should have '
                'reading proficiency in English.',
 'Keywords': 'English, Reading, Fact-verification',
 'LifetimeInSeconds': 604800,
 'MaxAssignments': 5,
 'QualificationRequirements': [{'ActionsGuarded': 'DiscoverPreviewAndAccept',
                                'Comparator': 'GreaterThanOrEqualTo',
                                'IntegerValues': [80],
                                'QualificationTypeId': '000000000000000000L0'},
                               {'ActionsGuarded': 'DiscoverPreviewAndAccept',
                                'Comparator': 'GreaterThan',
                                'IntegerValues': [1000],
                                'QualificationTypeId': '00000000000000000040'}],
 'Reward': '1.0',
 'Title': 'Verifying an affirmation with multiple evidence.'}


In [7]:
""" See how many HITs this will generate, already multiplied by the expected number of assignments.
Multiply the resulting number by the payment to see how much money this batch will consume. """
    
l=[]
done_count=0
target_assignments = TaskAttributes['MaxAssignments']
for taskSet in taskSets:
    TaskAttributes_hit = copy.deepcopy(TaskAttributes)
    TaskAttributes_hit['MaxAssignments'] = target_assignments -\
        sum([hit['hit']['NumberOfAssignmentsCompleted'] for hit in hit_result_collection.find({
            'taskSet_id':taskSet['_id'],
            'type': task_content['type'],
        })]) #or sum [hit['hit']['NumberOfAssignmentsCompleted'] for completed results
    l.append(TaskAttributes_hit['MaxAssignments'])
    if TaskAttributes_hit['MaxAssignments'] == 0:
        done_count += 1
print(f'Remaining HITs: {sum(l)}')
print(f'Tasks totally finished: {done_count}')
print(f'Expected cost: ${sum(l)*float(TaskAttributes["Reward"])*1.2}')

Remaining HITs: 455
Tasks totally finished: 0
Expected cost: $546.0


In [8]:
""" Create the batch of HITs """

results = []
batch_id = str(uuid.uuid4())

#start_from = 0

hit_type_id = ''
target_assignments = TaskAttributes['MaxAssignments']
for idx, taskSet in enumerate(taskSets):
    print(f"{idx}: {taskSet['_id']}")
    #if idx<start_from:
    #    continue
    TaskAttributes_hit = copy.deepcopy(TaskAttributes) # Adjust based on how many were already done in other batches
    TaskAttributes_hit['MaxAssignments'] = target_assignments -\
        sum([hit['hit']['NumberOfAssignmentsCompleted'] for hit in hit_result_collection.find({
            'taskSet_id':taskSet['_id'],
            'type': task_content['type']
        })])
    if TaskAttributes_hit['MaxAssignments'] > 0:
        try:
            random.seed(None)
            language_questions = random.sample(language_tests['en'],k=4)
            response = mt.create_hit(
                html_layout.replace('${affirmation_evidence_pairs}$', str(taskSet['taskSet'])).\
                            replace('${attention_questions}$', json.dumps(language_questions)),
                **TaskAttributes_hit
            )

            hit_type_id = response['HIT']['HITTypeId']
            result = {
                '_id': response['HIT']['HITId'],
                'batch_id': batch_id,
                'type': task_content['type'],
                'taskSet': taskSet['taskSet'],
                'attention_test': language_questions,
                'taskSet_id':taskSet['_id'],
                'hit': response['HIT'],
                'timestamp': datetime.now()
            }
            results.append(result)
            hit_result_collection.insert_one(result)
        except botocore.exceptions.ClientError as e:
            print(e.__dict__)
            if e.response['Error']['Code'] == 'RequestError':
                # Not enough funds
                print("Funds ran out! The last hit above was not launched! Please recharge!")
                break
            elif e.response['Error']['Code'] == 'ThrottlingException':    
                pdb.set_trace()              
                print("Turn off the database updater!")
                continue
            else:
                pdb.set_trace()            
                raise
        except Exception as e:
            pdb.set_trace()            
            raise
        #except:
        #    pdb.set_trace()            
        #    raise

# For you to go to the HITs you just created and test them
print('Launched tasks')
if not create_hits_in_production:
    print('You can view the HITs here:')
    print(mt.mturk_environment['preview']+"?groupId={}".format(hit_type_id))
else:
    print('Launched! Good Luck!')
    
print('Batch ID is',batch_id)

0: 11ecadad-c969-4854-ba28-add296d88fa9
1: 65ed483e-948b-44c5-9dad-54548bf4eab3
2: d5609d87-482e-47c9-8516-bc5b8446f625
3: 7e5ae0cb-bb27-4bed-bfeb-6fb741c89815
4: c47ac57d-9437-44e9-94bf-492c2f64bf54
5: 601161d4-282c-4b8d-ad0e-c8ee64fc439c
6: e3a5b6cc-c8e2-44da-b84f-23e01dc2668e
7: 7f1b7353-545c-43fe-8ffd-7aa9068eb127
8: 74b6737e-0cb4-4224-b726-87067c2dbb0e
9: c77ca64d-2f1d-4881-a2f5-f28d5161bc3e
10: 245e799f-15ca-4f64-9c4b-be63755ba684
11: ed78e074-fa1b-4695-bb7c-287a9420e7f2
12: 0ef73ba7-7bf3-447d-95f3-882d2945861f
13: 3997de30-1e52-41b9-8890-2eef86557579
14: a158f0c2-04d9-4ff6-9f7a-fdf4eda22015
15: 5402a8f6-fffb-4bf9-abee-de9bab804217
16: 89c05aef-337e-4971-ba12-4f95d4fc3436
17: 1a674890-af55-4119-a172-c67edc147a94
18: 670624cc-b1dd-43db-ab19-e984550ec5fb
19: 0e7219e6-d7ae-45f7-9047-d0afc13128b4
20: 43195bcf-5a8e-4926-ad47-6e95d5c4aebc
21: de5878cd-0a2a-4b83-9b46-35e18d5e811b
22: d15531b7-de73-4c14-af13-5ad9647b78e6
23: 66e2501e-36d3-4fd7-83a4-52bfc6af9727
24: 5496a89d-6a2d-463c-b06

In [5]:
# REMOVING FINISHED TASKS FROM THE UPDATE QUEUE

""" If you set 'force' to TRUE, it will abort mission and force an expiry in all HITs and then delete them.
If you only want to remove the completed ones (make them Disposed so the update routine won't loop through tons of
HITs), keep it as FALSE."""
force = False
while True:
    ''' Dispose all hits in the database '''
    query = {'hit.HITStatus': {'$ne': 'Disposed'}}
    #query['type'] = 'relevance'
    if not force:
        query['hit.NumberOfAssignmentsPending'] = 0
        query['hit.NumberOfAssignmentsAvailable'] = 0
    elif force:
        query['hit.NumberOfAssignmentsPending'] = 0
    hit_result_collection_list = list(hit_result_collection.find(query))
    if (not force and len(hit_result_collection_list) == 0) or (force and mt.client.list_hits()['NumResults']==0):
    #if len(list(hit_result_collection.find({'hit.HITStatus': {'$not': {'$eq': 'Disposed'}}, 'type':'relevance'}))) == 0:
        print('Finished')
        break
    for hit in hit_result_collection_list:
        try:
            #print(f'Trying to remove {hit["_id"]}')
            mt.client.delete_hit(HITId = hit['_id'])
            print('Removed',hit['_id'])
        except Exception as e:
            #print('Level 1', hit['_id'], e)
            if force:
                try:
                    mt.client.update_expiration_for_hit(HITId = hit['_id'], ExpireAt=datetime(2017, 1, 1))
                    mt.client.delete_hit(HITId = hit['_id'])
                    print('Removed',hit['_id'])
                except Exception as e:
                    #print('Level 2', hit['_id'],e)
                    pass
            continue

Removed 3HXK2V1N5NR9FY2U0NUF7FGZX4EG2L
Removed 34F34TZU8ZBJ60SV8TFI4S8FPWVJ2S
Removed 3NRZ1LDP8ZI5YMY5SDTGOZ4YJ40PZ6
Removed 306996CF7ZW6ZWCN1X2N1LFXPL8B1I
Removed 31MCUE39CNYY0GYD4N8KQPLR3IEG3W
Removed 375VMB7D5MVAEZ65N0EZOAKJ98VDIJ
Removed 3VADEH0UIF98XACRXLZ8S67VWP0SP4
Removed 3SU800BH9949CZ5AM39X7CFTMI7UQD
Removed 3FO95NVK6FCT9NS6AD3XX17AX7OSRW
Removed 35O6H0UNMVSB5XBIQCHG2ZBR96MJ5T
Removed 3X7837UUBGA7O1BWVVRLU5PR51XJ69
Removed 3SBX2M1TLGZAV1BV7XIFU8HZN5JQ4M
Removed 3XWUWJ18UO214SCR3CT2HDCIP3DUUI
Removed 3421H3BMADTU6KDUT2UXEFGTV25J9K
Removed 36QZ6V159CPLPFKNE7Y22CUU46SSU0
Removed 30Y6N4AHZS8N7Y9ODN8822LAW4EDRN
Removed 3SSN80MU9F0FI01AE30NVX2OHUWXKG
Removed 3JHB4BPSGNLBS7H7OSE6BGSF9XXQ9I
Removed 3R0WOCG22PLFNQ15HH3D7D8U33IDUR
Removed 3UQ1LLR27DKE6TBZOTKAQP4559SLA3
Removed 3P4C70TRNUTFGKO2ATCKMBF8345GLA
Removed 3IYI9285XVCW4IKX5M4OG5H11N8JC1
Removed 371QPA24D506VI641JT81FLJVS8T1Q
Removed 3QQUBC640HQGQ6MDBGS3NPJSBP1XNL
Removed 322ZSN9Z6JWNNH3NO1CAZDSM7CIT43
Removed 3TL87MO8DP192KS7Z

Removed 3SA4EMRVKYE8W1SFOJGOI01C1XHP0E
Removed 3RWO3EJEMKL83QDXK2KOW5KM6PNP1H
Removed 3X55NP42FRS4DTRC76Y7KYZJ4A23PQ
Removed 3OWZNK3RZO1SKG3PKSJWBUHQ6CTU2Z
Removed 33J5JKFML9APD5SR7JDWK3PT96O3QH
Removed 38LRF35D6O8H5YZ8OSX1U4MICQTU3A
Removed 33CLA8O0NLNK5INK4T3YQ2OV8HURFC
Removed 341H3G5YG3Q2A5ULBSEQ9Y3QBXP0ZD
Removed 3ZVPAMTJXQFQJIJFFD9G8NO79WURGB
Removed 3HA5ODM5LD4L2U9NJX87YYWDID1VSS
Removed 39N6W9XWSGZZGJV1T81AGJH4CJ9YGK
Removed 3DGDV62G8RLFSBVT36K21OURYV1P2Y
Removed 3SR6AEG6X85DGFQCDEJS1JT5RJMYHJ
Removed 3XDJY5RK6VX2NUICO62K49EOQQPU4V
Removed 386659BNUOT5F99D3H5YH4JEYIC10P
Removed 3X55NP42FRS4DTRC76Y7KYZJ491P39
Removed 3M7OI89LW10KGN5QL3AZ71CWFIP6CL
Removed 3D5G8J4N6DGMHUM28XDX83SZKWFVTH
Removed 3P520RYKDKIKJDMC2BLU91K2QMKU50
Removed 3WRKFXQBPEJDCA1WJW1D1VUKRW1YIC
Removed 3W5PY7V3VSJ97LL21EMDDW9K4BQYJO
Removed 3UYRNV2KJWBQB5L538GB7Q2ZKHT8NR
Removed 3HO4MYYR250G2HMQ7UVZ17Y2MHVU6G
Removed 3BS6ERDLA6JULFB4LZ5AR7UE0I16DL
Removed 3AA88CN99SF4I5RKJZMPEB9XJ0QYKN
Removed 3HJ1EVZS3RVPP67RK

Removed 3UAU495MJL46RNSU7QHTI7KT7HPUO5
Removed 3O0M2G5VD9E432U9QV7T8DRAD6G49H
Removed 3INZSNUD932H3YD2J68MBHLGQJ99DZ
Removed 3V0TR1NRWDEI8L4EI1LT482UJOY4AW
Removed 3R5LWXWHS3KAGE5ROCF8X5AVAEGGXZ
Removed 39N6W9XWSGZZGJV1T81AGJH4CKAGY5
Removed 378G7J1SKOAX704A8CTYVJBCOVUWE2
Removed 3IZVJEBJ7DWXOGG0DHU7IZDHOZAZ6A
Removed 3IH9TRB0GEBGM4QT77U4N8JAKCT1IK
Removed 3R4QIDVOKSO3Q0IJNCZS9D4CHSQEE7
Removed 3R868ACW5VOZL956PU3TUQQ6IEEGZW
Removed 3IVEC1GSMSBCHFAZPPF4Z9YAXRI1JW
Removed 3RQVKZ7ZSNVQM5RT7KQL1236Y2W72C
Removed 311HQEI8SVSGSGU5WRZZODDDJAUZ79
Removed 3W0XM68Y0S77SZGH7AFG0OYNCGI1KV
Removed 3909MD9T32TVWEIFINJYRVPCTT5EFP
Removed 3V7ICJJA0DSNRV96OFLPZJM016J4BR
Removed 335VBRUREMCMV4R2F1J6HZ31QVA9ES
Removed 3L1EFR8WXWHE1IRYAC3CZHO12WP9FA
Removed 33NOQL7TARBKSUZO6WR52D98UU0Z8W
Removed 35JDMRECD7L1AEEAT7PG9GPOU85EGO
Removed 3P4ZBJFX3YFPDE463ND4D1WC0W9WFK
Removed 3BFNCI9LZN2F7NNCBK4QKC8Y4GW73N
Removed 3WYZV0QBGMPEPXXG8QNUWUJYU1OBXD
Removed 3LN3BXKGD37VRE01E7JMVMWO1B9WGJ
Removed 3EGKVCRQGZ43P2NQD

# MISC

In [36]:
mt.client.get_account_balance()

{'AvailableBalance': '10000.00',
 'ResponseMetadata': {'RequestId': 'cac83b9b-d08b-477e-a444-cb6347002975',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cac83b9b-d08b-477e-a444-cb6347002975',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '31',
   'date': 'Wed, 06 Oct 2021 16:05:21 GMT'},
  'RetryAttempts': 0}}

In [8]:
#mt.client.send_bonus(
#    WorkerId='-',
#    BonusAmount='-',
#    AssignmentId='-',
#    Reason='feedback'
#)

{'ResponseMetadata': {'RequestId': 'a038b160-0721-4475-bbfd-ee73cf5fe3b9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a038b160-0721-4475-bbfd-ee73cf5fe3b9',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '2',
   'date': 'Fri, 13 Aug 2021 18:45:26 GMT'},
  'RetryAttempts': 0}}

In [38]:
# CLEANING LEFTOVERS

mt.client.list_hits(MaxResults=100)
#hitid = '3M93N4X8INZCBZ9T28UOVALJBK2SJO'
#mt.client.update_expiration_for_hit(HITId = hitid, ExpireAt=datetime(2018, 1, 1))
#mt.get_hit_answers(HITId = hitid, approve=True)
#mt.client.delete_hit(HITId = hitid)

{'NumResults': 0,
 'HITs': [],
 'ResponseMetadata': {'RequestId': '473b5051-fa30-48cd-8b87-4bd024bfebdf',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '473b5051-fa30-48cd-8b87-4bd024bfebdf',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '26',
   'date': 'Wed, 06 Oct 2021 16:07:04 GMT'},
  'RetryAttempts': 0}}