In [71]:
import boto3
import xmltodict
import json
import pandas as pd
import pyemoji
import html

In [72]:
create_hits_in_production = False
environments = {
        "production": {
            "endpoint": "",#"https://mturk-requester.us-east-1.amazonaws.com",
            "preview": "",#"https://www.mturk.com/mturk/preview"
        },
        "sandbox": {
            "endpoint": "https://mturk-requester-sandbox.us-east-1.amazonaws.com",
            "preview": "https://workersandbox.mturk.com/mturk/preview"
        },
}
mturk_environment = environments["production"] if create_hits_in_production else environments["sandbox"]

client = boto3.client(
    service_name='mturk',
    region_name='us-east-1',
    endpoint_url=mturk_environment['endpoint'],
)

In [73]:
# This will return your current MTurk balance if you are connected to Production.
# If you are connected to the Sandbox it will return $10,000.
print(client.get_account_balance()['AvailableBalance'])

10000.00


In [74]:
df = pd.read_csv('sample.csv')

In [78]:
html_layout = open('./claim.html', 'r').read()
QUESTION_XML = """<HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd">
        <HTMLContent><![CDATA[{}]]></HTMLContent>
        <FrameHeight>650</FrameHeight>
        </HTMLQuestion>"""
question_xml = QUESTION_XML.format(html_layout)

In [79]:
TaskAttributes = {
    'MaxAssignments': 5,                 
    'LifetimeInSeconds': 60*3,            # How long the task will be available on the MTurk website (1 hour)
    'AssignmentDurationInSeconds': 60*10, # How long Workers have to complete each item (10 minutes)
    'Reward': '0.05',                     # The reward you will offer Workers for each response
    'Title': 'Classify tweets',
    'Keywords': 'classify, tweet',
    'Description': 'Classify tweets claims'
}


In [80]:
results = []
hit_type_id = ''


def encode_tweet(tweet):
    return str(html.escape(tweet).replace('\n', '<br/>').encode('ascii', 'xmlcharrefreplace')).replace("b'", '')[:-1]


for row in df.head(5).to_dict('records'):
    response = client.create_hit(
        **TaskAttributes,
        Question=question_xml.replace('${tweet}',encode_tweet(row['tweet'])).replace('${topic}', encode_tweet(row['topic']))
    )
    hit_type_id = response['HIT']['HITTypeId']
    results.append({
        'tweet': row['tweet'],
        'hit_id': response['HIT']['HITId']
    })
    
print("You can view the HITs here:")
print(mturk_environment['preview'] + "?groupId={}".format(hit_type_id))




You can view the HITs here:
https://workersandbox.mturk.com/mturk/preview?groupId=34LHDG0NE61Q6J157QTWQ94XOW4SAS


In [70]:
for item in results:
    
    # Get the status of the HIT
    hit = client.get_hit(HITId=item['hit_id'])
    item['status'] = hit['HIT']['HITStatus']

    # Get a list of the Assignments that have been submitted by Workers
    assignmentsList = client.list_assignments_for_hit(
        HITId=item['hit_id'],
        AssignmentStatuses=['Submitted', 'Approved'],
        MaxResults=10
    )

    assignments = assignmentsList['Assignments']
    item['assignments_submitted_count'] = len(assignments)

    answers = []
    for assignment in assignments:
    
        # Retreive the attributes for each Assignment
        worker_id = assignment['WorkerId']
        assignment_id = assignment['AssignmentId']
        
        # Retrieve the value submitted by the Worker from the XML
        answer_dict = xmltodict.parse(assignment['Answer'])
        answer = answer_dict['QuestionFormAnswers']['Answer']['FreeText']
        answers.append(answer_dict)
        
        # Approve the Assignment (if it hasn't already been approved)
        if assignment['AssignmentStatus'] == 'Submitted':
            client.approve_assignment(
                AssignmentId=assignment_id,
                OverrideRejection=False
            )
    
    # Add the answers that have been retrieved for this item
    item['answers'] = answers

print(json.dumps(results,indent=2))

[
  {
    "tweet": "When u rent shit out, u still need permission on shit u bring. Food. Cakes. Balloons.  Bitch A BODY!! Yall need to get a fuckin clue. Green asses",
    "hit_id": "3087LXLJ7X31C9QIFCRNAPGEGSA0F3",
    "status": "Reviewable",
    "assignments_submitted_count": 0,
    "answers": []
  },
  {
    "tweet": "<MENTION> <MENTION> 2/2 the cattle after all are eating plant material that just pulled a stack of carbon out of the atmosphere in that annual production cycle where emission were created. Very different to car that is generating emissions from fossil fuels . \ud83e\udd37\ud83c\udffc\u200d\u2642\ufe0f",
    "hit_id": "33EEIIWHLIUEWZH7WPQ67HAIHQVQVE",
    "status": "Reviewable",
    "assignments_submitted_count": 1,
    "answers": [
      {
        "QuestionFormAnswers": {
          "@xmlns": "http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionFormAnswers.xsd",
          "Answer": {
            "QuestionIdentifier": "argumentative",
    