In [33]:
import boto3
import xmltodict
import json
import pandas as pd
import pyemoji
import html

In [2]:
create_hits_in_production = False
environments = {
        "production": {
            "endpoint": "",#"https://mturk-requester.us-east-1.amazonaws.com",
            "preview": "",#"https://www.mturk.com/mturk/preview"
        },
        "sandbox": {
            "endpoint": "https://mturk-requester-sandbox.us-east-1.amazonaws.com",
            "preview": "https://workersandbox.mturk.com/mturk/preview"
        },
}
mturk_environment = environments["production"] if create_hits_in_production else environments["sandbox"]

client = boto3.client(
    service_name='mturk',
    region_name='us-east-1',
    endpoint_url=mturk_environment['endpoint'],
)

In [3]:
# This will return your current MTurk balance if you are connected to Production.
# If you are connected to the Sandbox it will return $10,000.
print(client.get_account_balance()['AvailableBalance'])

10000.00


In [4]:
df = pd.read_csv('sample_mturk.csv')
df_known = pd.read_csv('../full-sample-v4.csv')

In [5]:
html_layout = open('./argumentative.html', 'r').read()
QUESTION_XML = """<HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd">
        <HTMLContent><![CDATA[{}]]></HTMLContent>
        <FrameHeight>650</FrameHeight>
        </HTMLQuestion>"""
question_xml = QUESTION_XML.format(html_layout)

In [62]:
TaskAttributes = {
    'MaxAssignments': 5,                 
    'LifetimeInSeconds': 60*120,            # How long the task will be available on the MTurk website (1 hour)
    'AssignmentDurationInSeconds': 60*10, # How long Workers have to complete each item (10 minutes)
    'Reward': '0.03',                     # The reward you will offer Workers for each response
    'Title': 'Classify tweets',
    'Keywords': 'classify, tweet',
    'Description': 'Classify if tweets are argumentative'
}


In [63]:
results = []
hit_type_id = ''


def encode_tweet(tweet):
    return str(html.escape(tweet).replace('\n', '<br/>').encode('ascii', 'xmlcharrefreplace')).replace("b'", '')[:-1]

df_set = pd.concat([df.drop(columns=['argumentative']), df_known]).sample(frac=1)


for i, row in enumerate(df_set.head(50).to_dict('records')):
    print('\r', 'Creating hit for tweet number', i)
    response = client.create_hit(
        **TaskAttributes,
        Question=question_xml.replace('${tweet}',encode_tweet(row['tweet'])),#.replace('${topic}', encode_tweet(row['topic']))
    )
    hit_type_id = response['HIT']['HITTypeId']
    results.append({
        'tweet': row['tweet'],
        'hit_id': response['HIT']['HITId'],
        'known_answer': row['argumentative']
    })
    
print("You can view the HITs here:")
print(mturk_environment['preview'] + "?groupId={}".format(hit_type_id))




 Creating hit for tweet number 0
 Creating hit for tweet number 1
 Creating hit for tweet number 2
 Creating hit for tweet number 3
 Creating hit for tweet number 4
 Creating hit for tweet number 5
 Creating hit for tweet number 6
 Creating hit for tweet number 7
 Creating hit for tweet number 8
 Creating hit for tweet number 9
 Creating hit for tweet number 10
 Creating hit for tweet number 11
 Creating hit for tweet number 12
 Creating hit for tweet number 13
 Creating hit for tweet number 14
 Creating hit for tweet number 15
 Creating hit for tweet number 16
 Creating hit for tweet number 17
 Creating hit for tweet number 18
 Creating hit for tweet number 19
 Creating hit for tweet number 20
 Creating hit for tweet number 21
 Creating hit for tweet number 22
 Creating hit for tweet number 23
 Creating hit for tweet number 24
 Creating hit for tweet number 25
 Creating hit for tweet number 26
 Creating hit for tweet number 27
 Creating hit for tweet number 28
 Creating hit for tweet 

In [65]:
with open('./active_hits.json', 'w') as fout:
    json.dump(results, fout)

In [55]:
workers = {
    
}


for item in results:
    
    # Get the status of the HIT
    hit = client.get_hit(HITId=item['hit_id'])
    item['status'] = hit['HIT']['HITStatus']

    # Get a list of the Assignments that have been submitted by Workers
    assignmentsList = client.list_assignments_for_hit(
        HITId=item['hit_id'],
        AssignmentStatuses=['Submitted', 'Approved'],
        MaxResults=10
    )

    assignments = assignmentsList['Assignments']
    item['assignments_submitted_count'] = len(assignments)

    answers = []
    for assignment in assignments:
        
        
        
        # Retreive the attributes for each Assignment
        worker_id = assignment['WorkerId']
        assignment_id = assignment['AssignmentId']
        
        # Retrieve the value submitted by the Worker from the XML
        answer_dict = xmltodict.parse(assignment['Answer'])
        answer = answer_dict['QuestionFormAnswers']['Answer']['FreeText']
        answer_dict['worker'] = worker_id
        answer_dict['answer'] = answer
        answers.append(answer_dict)
        
        
        workser_work = {
            "answer": answer,
            "known_answer": item['known_answer'],
            "assignment_id": assignment['AssignmentId'],
        }
        if worker_id in workers:
            workers[worker_id].append(workser_work)
        else:
            workers[worker_id] = [workser_work]
        
        # Approve the Assignment (if it hasn't already been approved)
        #if assignment['AssignmentStatus'] == 'Submitted':
        #    client.approve_assignment(
        #        AssignmentId=assignment_id,
        #        OverrideRejection=False
        #    )
    
    # Add the answers that have been retrieved for this item
    item['answers'] = answers



#print(json.dumps(results,indent=2))

In [56]:
for worker in workers:
    work = workers[worker]
    num_known = 0
    num_correct = 0
    
    for assign in work:
        if not pd.isnull(assign['known_answer']):
            num_known = num_known + 1
            if assign['known_answer'] == '0' and assign['answer'] == 'not argumentative':
                num_correct = num_correct + 1
            if assign['known_answer'] == '1' and assign['answer'] == 'argumentative':
                num_correct = num_correct + 1
    if num_known > 4:
        for assign in work:
            if num_correct/num_known > 0.65:
                client.approve_assignment(
                    AssignmentId=assign['assignment_id'],
                    OverrideRejection=False
                )
            else:
                client.reject_assignment(
                    AssignmentId=assign['assignment_id'],
                    RequesterFeedback="You failed to achive at least 70% agreement with known annotations"
                )
            
        

In [58]:
num_known, num_correct

(2, 1)