In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
pd.set_option('display.max_columns', 30)

In [None]:
df_assignments_ctrl = pd.read_csv('../XPlanningEvaluation/resources/mobilerobot/study/prefalign/mturk/assignments/assignments_control_full.csv')
df_assignments_expr = pd.read_csv('../XPlanningEvaluation/resources/mobilerobot/study/prefalign/mturk/assignments/assignments_experimental_full.csv')
df_answers_ctrl = pd.read_csv('../XPlanningEvaluation/resources/mobilerobot/study/prefalign/mturk/assignments/answerKey_control.csv')
df_answers_expr = pd.read_csv('../XPlanningEvaluation/resources/mobilerobot/study/prefalign/mturk/assignments/answerKey_experimental.csv')

In [None]:
df_assignments_ctrl.head()

In [None]:
df_assignments_expr.head()

In [None]:
df_answers_ctrl.head()

In [None]:
df_answers_expr.head()

In [None]:
# Create columns for all data types for all questions
# Columns are named in the format: question[i]-[data_type_name]
def create_columns(data_types, num_questions):
    columns = []
    for i in range(num_questions):
        for data_type in data_types:
            columns.append('question' + str(i) + "-" + data_type)
    return columns

In [None]:
# Create columns for a single data type for all questions
# Columns are named in the format: question[i]-[data_type_name]
def create_columns_for_data_type(data_type, num_questions):
    return ['question' + str(i) + '-' + data_type for i in range(num_questions)]

In [None]:
# Remove any assignment (i.e., any row) that contains missing data of a given data type of any question
def remove_assignments_with_missing_data(df_assignments, data_type, num_questions):
    columns = create_columns_for_data_type(data_type, num_questions)
    non_numeric_df_assignments_columns = df_assignments.select_dtypes(exclude=['int','float']).columns
    non_numeric_columns = [column for column in columns if column in non_numeric_df_assignments_columns]
    for column in non_numeric_columns:
        df_assignments = df_assignments[df_assignments[column] != 'null']
    return df_assignments

In [None]:
def set_columns_type_to_numeric(df_assignments, data_type, num_questions):
    columns = create_columns_for_data_type(data_type, num_questions)
    df_assignments[columns] = df_assignments[columns].apply(pd.to_numeric)
    return df_assignments

In [None]:
df_assignments_ctrl = remove_assignments_with_missing_data(df_assignments_ctrl, 'total-cost', 4)
df_assignments_ctrl.head()

In [None]:
df_assignments_expr = remove_assignments_with_missing_data(df_assignments_expr, 'total-cost', 4)
df_assignments_expr.head()

In [None]:
# Compute accuracy of total-cost (in %error) and answer (correct/incorrect) for each question, from each worker
def compute_accuracy(df_assignments, df_answers):
    # Accuracy of total-cost (%error) and answer (correct/incorrect) from workers
    accuracy_columns = create_columns(['ref', 'total-cost', 'answer'], 4)
    df_accuracy = pd.DataFrame(columns=accuracy_columns)
    
    # Data columns of interest for each question: ref, total-cost, answer
    ref_columns = create_columns_for_data_type('ref', 4)
    total_cost_columns = create_columns_for_data_type('total-cost', 4)
    answer_columns = create_columns_for_data_type('answer', 4)
    
    df_assignments = set_columns_type_to_numeric(df_assignments, 'total-cost', 4)
    
    for index, row in df_assignments.iterrows():
        # ref: question-mission[i]-agent[j] is shared between assignments and answerKey tables
        df_answer_key = df_answers[df_answers.ref.isin(row[ref_columns])]

        # Accuracy of total-cost and answer from workers
        worker_acc_dict = {'HIT ID': row['HIT ID'], 'Worker ID': row['Worker ID']}

        for i in range(4):
            ref_column = ref_columns[i] # question[i]-ref
            total_cost_column = total_cost_columns[i] # question[i]-total-cost
            answer_column = answer_columns[i] # question[i]-answer

            # total-cost and %error
            worker_total_cost = row[total_cost_column]
            correct_total_cost = df_answer_key[df_answer_key.ref == row[ref_column]].iloc[0]['total-cost']
            total_cost_err = abs((correct_total_cost - worker_total_cost) / correct_total_cost)

            # answer and correct/incorrect
            worker_answer = row[answer_column]
            correct_answer = df_answer_key[df_answer_key.ref == row[ref_column]].iloc[0]['answer']
            answer_acc = 1 if worker_answer == correct_answer else 0

            # Accuracy dict
            worker_acc_dict[ref_column] = row[ref_column]
            worker_acc_dict[total_cost_column] = total_cost_err
            worker_acc_dict[answer_column] = answer_acc

        df_accuracy = df_accuracy.append(worker_acc_dict, ignore_index=True)
    return df_accuracy

In [None]:
df_accuracy_ctrl = compute_accuracy(df_assignments_ctrl, df_answers_ctrl)
df_accuracy_ctrl.head()

In [None]:
df_accuracy_expr = compute_accuracy(df_assignments_expr, df_answers_expr)
df_accuracy_expr.head()

In [None]:
def compute_score(df_assignments, df_answers):
    # Score of answer-confidence combination
    score_columns = create_columns(['ref', 'confidence'], 4)
    df_score = pd.DataFrame(columns=score_columns)

    confidence_abs_scores = {'none': 0, 'slightly': 1, 'somewhat': 2, 'fairly': 3, 'completely': 4}
    
    # Data columns of interest for each question: ref, answer, confidence
    ref_columns = create_columns_for_data_type('ref', 4)
    answer_columns = create_columns_for_data_type('answer', 4)
    confidence_columns = create_columns_for_data_type('confidence', 4)

    for index, row in df_assignments.iterrows():
        # ref: question-mission[i]-agent[j] is shared between assignments and answerKey tables
        df_answer_key = df_answers[df_answers.ref.isin(row[ref_columns])]

        # Score of answer-confidence combination
        worker_score_dict = {'HIT ID': row['HIT ID'], 'Worker ID': row['Worker ID']}

        for i in range(4):
            ref_column = ref_columns[i] # question[i]-ref
            answer_column = answer_columns[i] # question[i]-answer
            confidence_column = confidence_columns[i] # question[i]-confidence

            # answer and correct/incorrect
            worker_answer = row[answer_column]
            correct_answer = df_answer_key[df_answer_key.ref == row[ref_column]].iloc[0]['answer']
            answer_acc = worker_answer == correct_answer

            # confidence and score
            worker_confidence = row[confidence_column]
            score = confidence_abs_scores[worker_confidence] if answer_acc else -1 * confidence_abs_scores[worker_confidence]

            # Score dict
            worker_score_dict[ref_column] = row[ref_column]
            worker_score_dict[confidence_column] = score

        df_score = df_score.append(worker_score_dict, ignore_index=True)
    return df_score

In [None]:
df_score_ctrl = compute_score(df_assignments_ctrl, df_answers_ctrl)
df_score_ctrl.head()

In [None]:
df_score_expr = compute_score(df_assignments_expr, df_answers_expr)
df_score_expr.head()

In [None]:
def compute_statistics(df, data_type, num_questions, ignore_columns):
    average_column_name = 'average-' + data_type
    columns = create_columns_for_data_type(data_type, num_questions)
    selected_columns = [column for column in columns if column not in ignore_columns]
    df[average_column_name] = df[selected_columns].mean(axis=1)
    mean = df[average_column_name].mean()
    std = df[average_column_name].std()
    return mean, std, df[average_column_name]

In [None]:
score_mean_ctrl, score_std_ctrl, scores_ctrl = compute_statistics(df_score_ctrl, 'confidence', 4, ['question1-confidence'])
score_mean_expr, score_std_expr, scores_expr = compute_statistics(df_score_expr, 'confidence', 4, ['question1-confidence'])
print('Score statistics:')
print('Control group: mean=%f, std=%f' % (score_mean_ctrl, score_std_ctrl))
print('Experimental group: mean=%f, std=%f' % (score_mean_expr, score_std_expr))

In [None]:
df_scores = pd.DataFrame({'scores_ctrl': scores_ctrl, 'scores_expr': scores_expr})
ax_scores = df_scores.plot.kde()

In [None]:
axhist_scores = df_scores.plot.hist(bins=20, alpha=0.3)

In [None]:
acc_mean_ctrl, acc_std_ctrl, accs_ctrl = compute_statistics(df_accuracy_ctrl, 'answer', 4, ['question1-answer'])
acc_mean_expr, acc_std_expr, accs_expr = compute_statistics(df_accuracy_expr, 'answer', 4, ['question1-answer'])
print('Accuracy statistics:')
print('Control group: mean=%f, std=%f' % (acc_mean_ctrl, acc_std_ctrl))
print('Experimental group: mean=%f, std=%f' % (acc_mean_expr, acc_std_expr))

In [None]:
df_accs = pd.DataFrame({'accs_ctrl': accs_ctrl, 'accs_expr': accs_expr})
ax_accs = df_accs.plot.kde()

In [None]:
axhist_accs = df_accs.plot.hist(bins=20, alpha=0.3)

In [None]:
def filter_columns(columns, ignore_columns):
    return [column for column in columns if column not in ignore_columns]

In [None]:
def create_mixed_effect_table(df_accuracy, df_score, ignore_columns):
    mixed_effect_columns = ['question-ref', 'group', 'participant', 'accuracy', 'score']
    df_mixed_effect = pd.DataFrame(columns=mixed_effect_columns)
    
    ref_columns = create_columns_for_data_type('ref', 4) # question[i]-ref
    answer_columns = create_columns_for_data_type('answer', 4) # question[i]-answer (accuracy)
    confidence_columns = create_columns_for_data_type('confidence', 4) # question[i]-confidence (accuracy-confidence score)
    
    # Filter out some columns (e.g., data from validation question)
    selected_ref_columns = filter_columns(ref_columns, ignore_columns)
    selected_answer_columns = filter_columns(answer_columns, ignore_columns)
    selected_confidence_columns = filter_columns(confidence_columns, ignore_columns)
    
    worker_column = ['Worker ID']
    selected_df_accuracy = df_accuracy[selected_ref_columns + selected_answer_columns + worker_column]
    selected_df_score = df_score[selected_ref_columns + selected_confidence_columns + worker_column]
    
    for i in range(selected_df_accuracy.shape[0]):
        row_accuracy = selected_df_accuracy.iloc[i]
        row_score = selected_df_score.iloc[i]
        participant = row_accuracy.get('Worker ID')
        for i in range(4):
            if ref_columns[i] in ignore_columns:
                continue
            ref = row_accuracy.get(ref_columns[i])
            group = 'experimental' if '-explanation' in ref else 'control'
            accuracy = row_accuracy.get(answer_columns[i])
            score = row_score.get(confidence_columns[i])
            
            ref_modified = ref.replace('-explanation', '')
            row_dict = {'question-ref': ref_modified, 'group': group, 'participant': participant, 'accuracy': accuracy, 'score': score}
            df_mixed_effect = df_mixed_effect.append(row_dict, ignore_index=True)
    
    return df_mixed_effect

In [None]:
df_mixed_effect_ctrl = create_mixed_effect_table(df_accuracy_ctrl, df_score_ctrl, [])
df_mixed_effect_expr = create_mixed_effect_table(df_accuracy_expr, df_score_expr, [])

In [None]:
df_mixed_effect_ctrl.head()

In [None]:
df_mixed_effect_expr.head()

In [None]:
df_mixed_effect_all = df_mixed_effect_ctrl.append(df_mixed_effect_expr, ignore_index=True)

In [None]:
df_mixed_effect_all.head()

In [None]:
# df_mixed_effect_all.to_csv('data_all.csv', encoding='utf-8', index=False)

In [None]:
ignore_columns_validation = ['question1-ref', 'question1-answer', 'question1-confidence']
df_mixed_effect_3qs_ctrl = create_mixed_effect_table(df_accuracy_ctrl, df_score_ctrl, ignore_columns_validation)
df_mixed_effect_3qs_expr = create_mixed_effect_table(df_accuracy_expr, df_score_expr, ignore_columns_validation)

In [None]:
df_mixed_effect_3qs_ctrl.head()

In [None]:
df_mixed_effect_3qs_expr.head()

In [None]:
df_mixed_effect_3qs_all = df_mixed_effect_3qs_ctrl.append(df_mixed_effect_3qs_expr, ignore_index=True)

In [None]:
df_mixed_effect_3qs_all.head()

In [None]:
# df_mixed_effect_3qs_all.to_csv('data_3qs.csv', encoding='utf-8', index=False)

In [None]:
df_mixed_effect_3qs_aligned = df_mixed_effect_3qs_all[df_mixed_effect_3qs_all['question-ref'].str.contains('-agent0')]
# df_mixed_effect_3qs_aligned.to_csv('data_3qs_aligned.csv', encoding='utf-8', index=False)

In [None]:
df_mixed_effect_3qs_unaligned = df_mixed_effect_3qs_all[~df_mixed_effect_3qs_all['question-ref'].str.contains('-agent0')]
# df_mixed_effect_3qs_unaligned.to_csv('data_3qs_unaligned.csv', encoding='utf-8', index=False)

In [None]:
df_questions_acc_ctrl = df_mixed_effect_3qs_ctrl.groupby(['question-ref'], as_index=False).mean().sort_values('accuracy')
df_questions_acc_expr = df_mixed_effect_3qs_expr.groupby(['question-ref'], as_index=False).mean().sort_values('accuracy')

In [None]:
df_questions_acc_ctrl

In [None]:
df_questions_acc_expr

In [None]:
ax_questions_acc_ctrl = df_questions_acc_ctrl.plot.bar(x='question-ref', y='accuracy')

In [None]:
ax_questions_acc_expr = df_questions_acc_expr.plot.bar(x='question-ref', y='accuracy')

In [None]:
df_questions_acc_expr_reindexed = df_questions_acc_expr.set_index('question-ref')
df_questions_acc_expr_reindexed = df_questions_acc_expr_reindexed.reindex(index=df_questions_acc_ctrl['question-ref'])
df_questions_acc_expr_reindexed = df_questions_acc_expr_reindexed.reset_index()

In [None]:
ls_questions_acc_ctrl = df_questions_acc_ctrl['accuracy'].tolist()
ls_questions_acc_expr = df_questions_acc_expr_reindexed['accuracy'].tolist()

In [None]:
questions_order = df_questions_acc_ctrl['question-ref'].tolist()
df_questions_acc_all = pd.DataFrame({'control': ls_questions_acc_ctrl, 'experimental': ls_questions_acc_expr}, index=questions_order)

In [None]:
df_questions_acc_all

In [None]:
ax_questions_acc_all = df_questions_acc_all.plot.bar(figsize=(25,25))

In [None]:
fig_questions_acc_all = ax_questions_acc_all.get_figure()
# fig_questions_acc_all.savefig('questions_acc_all.pdf')