## Read input and convert to pandas df

In [None]:
import pandas as pd
import chardet

Raw_Response = "xxx.csv"
# Auto encoding tool
with open(Raw_Response, 'rb') as f:
    enc = chardet.detect(f.read())  # or readline if the file is large

df_raw_response= pd.read_csv(Raw_Response, encoding = enc['encoding'])
df_raw_response

In [None]:
# For more info: https://sparkbyexamples.com/pandas/pandas-drop-multiple-columns-by-index/
df_raw_response = df_raw_response.drop(df_raw_response.columns[[1]],axis = 1)
df_raw_response

In [None]:
# computing number of rows
rows = len(df_raw_response.axes[0])
 
# computing number of columns
cols = len(df_raw_response.axes[1])

print("Number of Rows: ", rows)
print("Number of Columns: ", cols)

In [4]:
df_raw_response = df_raw_response.rename(columns={'Worker ID: Please provide your MTurk Worker ID for authentication purpose': 'Worker_ID'})

In [None]:
list(df_raw_response)

## Find worker ID from MTurk results in Google Form

### Import MTurk results

In [None]:
MTurk_Results = "Batch_xxx_batch_results.csv"
# Auto encoding tool
with open(MTurk_Results, 'rb') as f:
    enc = chardet.detect(f.read())  # or readline if the file is large

df_MTurk_results= pd.read_csv(MTurk_Results, encoding = enc['encoding'])
df_MTurk_results

### Loop through each result and look for matching worker ID from Google Form response

In [None]:
count = 0

# Match each worker by iterating through the df
for index in df_MTurk_results.index:
    
    Worker_ID_MTurk_Result = df_MTurk_results['WorkerId'][index].replace(" ", "")
    print('\nLooking for matching ID:',Worker_ID_MTurk_Result)
    
    for index in df_raw_response.index:
        if (Worker_ID_MTurk_Result == df_raw_response['Worker_ID'][index].replace(" ", "")):
            print('Matching response found for: ',Worker_ID_MTurk_Result)
            break
    else:
        print('Cannot find matching response for worker: ',Worker_ID_MTurk_Result)
        count += 1
    

print(count,'malicious workers have been detected in total')

# Approve results based on Google Form submission

##### https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/mturk.html#MTurk.Client.approve_assignment

In [8]:
import boto3
client = boto3.client('mturk')
HIT_ID = 'xxxxxxxxxxxxxxx'
Worker_ID = ''

In [9]:
All_Submitted_Assignment_Results = client.list_assignments_for_hit(
    HITId=HIT_ID,
    MaxResults=100,
    AssignmentStatuses=[
        'Submitted'
    ]
)

## Loop through each assignment and approve those with a matching ID in Google Form

In [None]:
count_approve_assignment = 0
count_approve_assignment_error = 0
count_malicious = 0

for i in range(len(All_Submitted_Assignment_Results['Assignments'])):
    
    Worker_ID = All_Submitted_Assignment_Results['Assignments'][i]['WorkerId']
    
    # Look for matching ID from Google Form responses
    for index in df_raw_response.index:
        if (Worker_ID == df_raw_response['Worker_ID'][index].replace(" ", "")):
            print('Matching response found for: ',Worker_ID)
            
            response = client.approve_assignment(
                AssignmentId = All_Submitted_Assignment_Results['Assignments'][i]['AssignmentId'],
                RequesterFeedback = 'Thank you for submitting survey response.'
            )
            
            if (response["ResponseMetadata"]["HTTPStatusCode"] == 200):
                print('Successfully approve_assignment for WorkerId: ',Worker_ID)
                count_approve_assignment += 1
            else:
                print('Failed to approve_assignment for WorkerId: ',Worker_ID)
                count_approve_assignment_error += 1
            
            break
            
                
    else:
        print('Cannot find matching response for worker: ',Worker_ID)
        
        response = client.reject_assignment(
                AssignmentId = All_Submitted_Assignment_Results['Assignments'][i]['AssignmentId'],
                RequesterFeedback = 'Unfortunately we did not receive your survey response.'
            )
        if (response["ResponseMetadata"]["HTTPStatusCode"] == 200):
            print('Successfully reject_assignment for WorkerId: ',Worker_ID)
        else:
            print('Failed to reject_assignment for WorkerId: ',Worker_ID)

        count_malicious += 1

print(count_approve_assignment,'were approved in total')
print(count_approve_assignment_error,'encountered errors in approval attempts')
print(count_malicious,'malicious submissions were rejected')