# Qualitative Worker Answers Analysis


In [None]:
import json
import datetime
import dateutil.parser
from sklearn.metrics import cohen_kappa_score, f1_score, precision_score, recall_score, accuracy_score

In [None]:
survey_questions = {
    "question_1": {"question": "How clear was it what the task was about?", "responses": []},
    "question_2": {"question": "In general, how difficult/easy was the task?", "responses": []},
    "question_3": {"question": "How difficult/easy was it to understand the difference between own claims, background claims and data?", "responses": []},
    "question_4": {"question": "How difficult/easy was it to distinguish own claims from background claims?", "responses": []},
    "question_5": {"question": "How difficult/easy was it to distinguish own claims from data?", "responses": []},
    "question_6": {"question": "How difficult/easy was it to distinguish background claims from data?", "responses": []},
    "question_7": {"question": "How difficult/easy was it to understand the content of the texts?", "responses": []},
    "question_8": {"question": "Seeing more context would have been helpful to accurately annotate the given paragraphs.", "responses": []},
    "question_9": {"question": "More background knowledge about computer graphics is required to be able to annotate accurately.", "responses": []},
    "question_10": {"question": "The payment I received was appropriate for the work I did.", "responses": []},
    "question_11": {"question": "It took me less/more time than indicated to properly annotate all paragraphs.", "responses": []},
}
survey_questions_relations = {
    "question_1": {"question": "How clear was it what the task was about?", "responses": []},
    "question_2": {"question": "In general, how difficult/easy was the task?", "responses": []},
    "question_3": {"question": "How difficult/easy was it to understand the difference between Supports, Contradicts and Parts of Same?", "responses": []},
    "question_4": {"question": "How difficult/easy was it to distinguish Supports from Contradicts relations?", "responses": []},
    "question_5": {"question": "How difficult/easy was it to distinguish Supports from Parts of Same relations?", "responses": []},
    "question_6": {"question": "How difficult/easy was it to distinguish Contradicts from Parts of Same relations?", "responses": []},
    "question_7": {"question": "How difficult/easy was it to understand the content of the texts?", "responses": []},
    "question_8": {"question": "Seeing more context would have been helpful to accurately annotate the relations in the given paragraphs.", "responses": []},
    "question_9": {"question": "More background knowledge about computer graphics is required to be able to annotate the relations accurately.", "responses": []},
    "question_10": {"question": "Definitions of the different argument types (own claims, background claims, data) would have been helpful to identify the relations between them.", "responses": []},
    "question_11": {"question": "The payment I received was appropriate for the work I did.", "responses": []},
    "question_12": {"question": "It took me less/more time than indicated to properly annotate the relations in all paragraphs.", "responses": []},
}

feedbacks = []


Load answers for one specific HIT. Then analyse answer of each worker individually, i.e. # of annotations, logs incl. timestamps, survey-answers.

In [None]:
# define the type and the id of the hit whose answers you want to retrieve
hit_id = ''
#assignment_id = '3LKC68YZ3D1A9SIVQ9WPGPSOVSSOW6'
#hit_type = 'componentAnnotation'
hit_type = 'relationAnnotation'
#hit_type = 'payment_for_non_submitted_HIT'

# choose which asignment status should be considered for the analysis: 'Submitted'|'Approved'|'Rejected'
assignment_status_to_include_in_analysis = ['Submitted', 'Approved', 'Rejected']

filename = 'HIT_[3]_' + hit_id + '.txt'

create_hits_in_production = True

if create_hits_in_production:
    environment_name = 'production'
else:
    environment_name = 'sandbox'

nr_of_workers_who_passed_attention_task = 0
nr_of_workers_with_zero_annotations = 0

# to load the worker answer from the saved txt file, load json in the following way:
with open(os.path.join('WorkerAnswers',environment_name,hit_type,filename), "r") as f:
    all_answers = json.load(f)
    
    all_answers_included_in_analysis = [(x,y) for (x,y) in all_answers.items() if y["AssignmentStatus"] in assignment_status_to_include_in_analysis]
    nr_of_answers_included_in_analysis = len(all_answers_included_in_analysis)
    print("In total, " + str(nr_of_answers_included_in_analysis) + " assignments are included in the analysis.")
    
    #for assignment_id, answer in all_answers.items():
    
    for assignment_id, answer in all_answers_included_in_analysis:
    
        print("")
        print("")
        print("##############################################################################")
        print("ANSWERS FOR ASSIGNMNENT WITH ID=" + str(assignment_id))
        print("##############################################################################")
        print("")


        annotations = json.loads(answer['worker_answer']['submit_annotations'])
        print("")


        if hit_type == 'componentAnnotation':
        
            print("Annotated argument components: ", answer["worker_answer"]["submit_annotations"])
            print("")
            print("Number of annotated argument components:")
            total_nr_of_annotations = 0
            paragraph_counter = 1
            for paragraph_annotations in annotations:
                nr_of_annotations = len(paragraph_annotations)
                print("Paragraph " + str(paragraph_counter) + ": " + str(nr_of_annotations))
                total_nr_of_annotations += nr_of_annotations
                paragraph_counter += 1


        elif hit_type == 'relationAnnotation':
        
            print("Annotated argument relations: ", answer["worker_answer"]["submit_annotations"])
            print("")
            print("Number of annotated argument relations:")
            total_nr_of_annotations = 0
            paragraph_counter = 1
            for index, paragraph_annotations in annotations.items():
                nr_of_annotations = len(paragraph_annotations)
                print("Paragraph " + str(paragraph_counter) + ": " + str(nr_of_annotations))
                total_nr_of_annotations += nr_of_annotations
                paragraph_counter += 1

            
        print("Total number of annotations: " + str(total_nr_of_annotations))
        if total_nr_of_annotations == 0:
            nr_of_workers_with_zero_annotations += 1

        print("")
        print("Filter data:")
        print("   ", answer["worker_answer"]["submit_filter_data_step1"])
        print("")
        
        print("Assignment Status: " + answer["AssignmentStatus"])

        """
        # only used for pilots
        attention_task = str(answer["worker_answer"]["submit_attention_task"])
        print("Attention task: " + attention_task)
        passed_attention_task = attention_task.strip().lower() == ("thank you")
        print("Passed attention task: " + ("Yes" if passed_attention_task else "No"))
        if passed_attention_task:
            nr_of_workers_who_passed_attention_task += 1
        """

        print("")
        try:
            print("")
            print("submit_annotations_OLD_WithoutCharacterIndices: ", answer["worker_answer"]["submit_annotations_OLD_WithoutCharacterIndices"])
        except:
            pass

        AcceptTime = dateutil.parser.parse( answer['AcceptTime'] )
        AcceptTime_unix = AcceptTime.timestamp()
        
        logger = answer['worker_answer']['submit_logger'].split("//")
        print("")
        time_last_log = AcceptTime_unix
        page_time = 0
        for log in logger:
            if log != "":
                message = log.split("/")[1]

                time_milliseconds = float(log.split("/")[0])
                time_in_seconds = time_milliseconds / 1000.0
                time_since_accept_in_seconds = round(time_in_seconds-AcceptTime_unix, 2)
                time_since_accept_in_minutes = round((time_in_seconds-AcceptTime_unix) / 60.0, 2)

                time_since_last_log_in_seconds = round(time_in_seconds-time_last_log, 2)
                time_since_last_log_in_minutes = round((time_in_seconds-time_last_log) / 60.0, 2)
                
                page_time += time_since_last_log_in_seconds

                if "page" in message:
                    time_on_this_page_in_seconds = round(page_time, 2)
                    time_on_this_page_in_minutes = round(page_time, 2) / 60.0
                    print("")
                    print("----- Time spent on this page: " + str(time_on_this_page_in_seconds) + " s | " + str(time_on_this_page_in_minutes) + " min -----")
                    print("")
                    print("----- NEW PAGE -----")
                    print("")
                    page_time = 0

                print("")
                print("log message: " + message)

                print("  time since accept: " + str(time_since_accept_in_seconds) + " s | " + str(time_since_accept_in_minutes) + " min")
                print("  time since last log: " + str(time_since_last_log_in_seconds) + " s | " + str(time_since_last_log_in_minutes) + " min")

                time_last_log = time_in_seconds

        #print(logger)
        print("")
        print("")
        #print("SUBMITTED: " +  + " s")

        # retrieve all answers from finish-survey
        if hit_type == 'componentAnnotation':
            if 'feedbackSurvey_question1' in answer["worker_answer"].keys():
                survey_questions["question_1"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question1"])
            if 'feedbackSurvey_question2' in answer["worker_answer"].keys():
                survey_questions["question_2"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question2"])
            if 'feedbackSurvey_question3' in answer["worker_answer"].keys():
                survey_questions["question_3"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question3"])
            if 'feedbackSurvey_question4' in answer["worker_answer"].keys():
                survey_questions["question_4"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question4"])
            if 'feedbackSurvey_question5' in answer["worker_answer"].keys():
                survey_questions["question_5"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question5"])
            if 'feedbackSurvey_question6' in answer["worker_answer"].keys():
                survey_questions["question_6"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question6"])
            if 'feedbackSurvey_question7' in answer["worker_answer"].keys():
                survey_questions["question_7"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question7"])
            if 'feedbackSurvey_question8' in answer["worker_answer"].keys():
                survey_questions["question_8"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question8"])
            if 'feedbackSurvey_question9' in answer["worker_answer"].keys():
                survey_questions["question_9"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question9"])
            if 'feedbackSurvey_question10' in answer["worker_answer"].keys():
                survey_questions["question_10"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question10"])
            if 'feedbackSurvey_question11' in answer["worker_answer"].keys():
                survey_questions["question_11"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question11"])
            
        elif hit_type == 'relationAnnotation':
            if 'feedbackSurvey_question1' in answer["worker_answer"].keys():
                survey_questions_relations["question_1"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question1"])
            if 'feedbackSurvey_question2' in answer["worker_answer"].keys():
                survey_questions_relations["question_2"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question2"])
            if 'feedbackSurvey_question3' in answer["worker_answer"].keys():
                survey_questions_relations["question_3"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question3"])
            if 'feedbackSurvey_question4' in answer["worker_answer"].keys():
                survey_questions_relations["question_4"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question4"])
            if 'feedbackSurvey_question5' in answer["worker_answer"].keys():
                survey_questions_relations["question_5"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question5"])
            if 'feedbackSurvey_question6' in answer["worker_answer"].keys():
                survey_questions_relations["question_6"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question6"])
            if 'feedbackSurvey_question7' in answer["worker_answer"].keys():
                survey_questions_relations["question_7"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question7"])
            if 'feedbackSurvey_question8' in answer["worker_answer"].keys():
                survey_questions_relations["question_8"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question8"])
            if 'feedbackSurvey_question9' in answer["worker_answer"].keys():
                survey_questions_relations["question_9"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question9"])
            if 'feedbackSurvey_question10' in answer["worker_answer"].keys():
                survey_questions_relations["question_10"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question10"])
            if 'feedbackSurvey_question11' in answer["worker_answer"].keys():
                survey_questions_relations["question_11"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question11"])
            if 'feedbackSurvey_question12' in answer["worker_answer"].keys():
                survey_questions_relations["question_12"]["responses"].append(answer["worker_answer"]["feedbackSurvey_question12"])
            

        feedbacks.append(answer["worker_answer"]["feedback"])

"""
# only used for pilots
print("")
print("##############################################################################")
print("FEEDBACK SURVEY ANSWERS FROM ALL ASSIGNMENTS")
print("##############################################################################")
print("")

print("")
print("----- Survey Answers -----")
print("")

if hit_type == 'componentAnnotation':
    survey_questions_and_answers = survey_questions
elif hit_type == 'relationAnnotation':
    survey_questions_and_answers = survey_questions_relations

for questionID, question_workerAnswer in survey_questions_and_answers.items():
    print("[" + str(questionID) + "]")
    print("  " + str(question_workerAnswer["question"]))
    for response in question_workerAnswer["responses"]:
        print("    " + str(response))
    print("")
"""

print("")
print("")
print("----- Feedbacks -----")
print("")
[print(" - " + str(feedback)) for feedback in feedbacks]
print("")

print(str(nr_of_workers_who_passed_attention_task) + " of " + str(str(nr_of_answers_included_in_analysis)) + " passed the attention task.")
print(str(nr_of_workers_with_zero_annotations) + " of " + str(str(nr_of_answers_included_in_analysis)) + " did zero annotations.")

print("")
print("-------- Worker Answer Analysis FINISHED!!! --------")
#print(feedbacks)
#print(type(feedbacks))

