In [1]:
import boto3
import json
import numpy as np
import xmltodict
import pandas as pd

In [2]:
def get_config(filepath='secrets.json'):
    config = {}
    with open(filepath, 'r') as f:
        config = json.load(f)
    return config

In [3]:
region_name = 'us-east-1'
config = get_config()
aws_access_key_id = config['aws_access_key_id']
aws_secret_access_key = config['aws_secret_access_key']

endpoint_url = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'

# Uncomment this line to use in production
# endpoint_url = 'https://mturk-requester.us-east-1.amazonaws.com'

client = boto3.client(
    'mturk',
    endpoint_url=endpoint_url,
    region_name=region_name,
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
)

# This will return $10,000.00 in the MTurk Developer Sandbox
print(client.get_account_balance()['AvailableBalance'])

10000.00


In [4]:
#Get HIT Id from the create HIT notebook
hitId = '3NC6WP7WKPSYEY4BOLBJQBXSHT0WWK'

hitAssignments = np.array([])
    
#   "HITId": String,
#   "AssignmentStatuses": String,
#   "NextToken": String,
#   "MaxResults": Integer
nextToken = ''
fetchNext = True
while fetchNext:
    if nextToken == '': #First loop
        assignmentList = client.list_assignments_for_hit(HITId=hitId,MaxResults=10) #10 is good. Danger of timeout.
    else:
        assignmentList = client.list_assignments_for_hit(HITId=hitId,MaxResults=10,NextToken=nextToken)
    hitAssignments = np.append(hitAssignments, assignmentList['Assignments'])

    if 'NextToken' in assignmentList:
        nextToken = assignmentList['NextToken'] 
        print('---',nextToken)
    else:
        nextToken = ''
        fetchNext = False
    

--- p1:IAOTJuDNU9JtVGe6NrqMT6ySflZn8tu0YKGFy00O9+j+1Y1HzOZeaTtW1WcQDw==


In [5]:
allAssignments = np.array([])

for hitAssignment in hitAssignments:
    singleAssignment = {}
    singleAssignment['AssignmentId']=hitAssignment['AssignmentId']
    singleAssignment['WorkerId']=hitAssignment['WorkerId']
    singleAssignment['HITId']=hitAssignment['HITId']
    singleAssignment['AssignmentStatus']=hitAssignment['AssignmentStatus']
    singleAssignment['AutoApprovalTime']=str(hitAssignment['AutoApprovalTime'])
    singleAssignment['AcceptTime']=str(hitAssignment['AcceptTime'])
    singleAssignment['SubmitTime']=str(hitAssignment['SubmitTime'])
    
    answer = xmltodict.parse(hitAssignment['Answer'])
    answerStr = json.dumps(answer) #print this to see structure
    for a in answer['QuestionFormAnswers']['Answer']:
        #print(json.dumps(a)) #print this to see structure  -- {"QuestionIdentifier": "ref_1_listened", "FreeText": "true"}
        singleAssignment[a['QuestionIdentifier']]=a['FreeText']
    allAssignments = np.append(allAssignments, singleAssignment)

In [6]:
colNames = [colName for colName in allAssignments[0]]

assignmentsDF = pd.DataFrame(columns=colNames)

for assignment in allAssignments:
    rowArr = []
    for col in colNames:
        rowArr.append(assignment[col])
    assignmentsDF.loc[assignment['AssignmentId']] = rowArr
        
assignmentsDF.head()

Unnamed: 0,AssignmentId,WorkerId,HITId,AssignmentStatus,AutoApprovalTime,AcceptTime,SubmitTime,sample,turkSubmitTo,workerId,...,clip_5_listened,ref_2_listened,distance_arrangement_listened,ordering_checkbox,distance_checkbox,postsurvey_speaker_headphones,postsurvey_past_audiotests,postsurvey_environment,postsurvey_confusing,postsurvey_additional_comments
3IFS6Q0HJRRPT61R5125GFHEQY3SI6,3IFS6Q0HJRRPT61R5125GFHEQY3SI6,AJ50O8TZ96CYQ,3VLL1PIEOYWU3K0E8M4G8LEOVRIOZZ,Approved,2022-03-14 15:23:24+08:00,2022-03-07 15:22:16+08:00,2022-03-07 15:23:24+08:00,0,https://workersandbox.mturk.com,AJ50O8TZ96CYQ,...,True,True,True,True,True,Test animated sound,0,Test animated sound,Test animated sound,Test animated sound
3OWEPKL08IKVO9SQEEHMQWHZPJKN7U,3OWEPKL08IKVO9SQEEHMQWHZPJKN7U,A3AB5MMZPUTO91,3VLL1PIEOYWU3K0E8M4G8LEOVRIOZZ,Approved,2022-03-14 15:28:11+08:00,2022-03-07 15:27:02+08:00,2022-03-07 15:28:11+08:00,0,https://workersandbox.mturk.com,A3AB5MMZPUTO91,...,True,True,True,True,True,Purnima Kamath test,3,Purnima Kamath test,Purnima Kamath test,Purnima Kamath test


In [136]:
assignmentsDF.to_csv('data/{}.csv'.format(hitId))