In [1]:
%autoreload 2

In [2]:
from collections import defaultdict, Counter
from itertools import combinations, chain
import json
from types import SimpleNamespace
import typing


import colorcet as cc
import krippendorff
from IPython.display import display, HTML, Markdown
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from tabulate import tabulate


In [3]:
class ExtendedSimpleNamespace(SimpleNamespace):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __contains__(self, key):
        return hasattr(self, key)

    def __getitem__(self, key):
        return getattr(self, key)

    def get(self, key, default=None):
        return getattr(self, key, default)

    def keys(self):
        return [k for k in vars(self).keys() if not k.startswith("_")]
        

In [4]:
PREVIOUS_DATA_PATH = './human_evals_data/real-complete_only-main-pilot_40_data.json'
CURRENT_DATA_PATH = './human_evals_data/real-complete_only-main-pilot_01-27_data.json'

GAME_KEYS = [
    'id', 'real', 'matched', 
    'confident', 'fun_play', 'fun_watch', 
    'capability', 'goldilocks', 'creativity',
    'human-likeness', 'explain', 'overall'
]

RENAMED_GAME_KEYS = {
    'id': 'game_id',
    'human-likeness': 'human_likeness'
}

NUMBER_SCORE_ATTRIBUTES = [
    'confident', 'fun_play', 
    'fun_watch',  'capability', 
    'goldilocks', 'creativity',
    'human_likeness',
]

NUMBER_SCORE_AXIS_NAMES = {
    'confident': 'How confident are you that you understand the game?',
    'fun_play': 'How fun would it be to play the game yourself?',
    'fun_watch': 'How fun would it be to watch someone else play the game?',
    'capability': 'How helpful would it be for learning to interact',
    'goldilocks': 'Too easy, appropriately difficult, or too hard for you?',
    'creativity': 'How creatively designed is the game?',
    'human_likeness': 'How human-like do you think this game is?',
}


def load_data(data_path: str) -> typing.Tuple[typing.List[ExtendedSimpleNamespace], typing.List[ExtendedSimpleNamespace]]:
    with open(data_path, 'r') as f:
        raw_data = json.load(f, object_hook=lambda d: ExtendedSimpleNamespace(**d))

    prolific_participants_data = []
    non_prolific_participants_data = []

    for participant_data in raw_data:
        if participant_data.data.recruitment_info.get('prolific_id', ''):
            prolific_participants_data.append(participant_data)
        else:
            non_prolific_participants_data.append(participant_data)

    return prolific_participants_data, non_prolific_participants_data


def real_matched_to_str(series: pd.Series) -> str:
    if series.real:
        return 'real'

    if series.matched:
        return 'matched'

    return 'unmatched'


def full_game_id_to_str(series: pd.Series) -> str:
    return f"{series.game_id}-{series.game_type}"


def loaded_data_to_df(loaded_data: typing.List[ExtendedSimpleNamespace]):
    rows = []

    for participant in loaded_data:
        participant_id = participant.id
        prolific_id = participant.data.recruitment_info.prolific_id

        for game_result in participant.data.single_game_results:
            rows.append({
                'participant_id': participant_id,
                'prolific_id': prolific_id,
                **{RENAMED_GAME_KEYS.get(key, key): game_result[key] for key in GAME_KEYS}
            })
    
    df = pd.DataFrame(rows)
    df = df.assign(game_type=pd.Categorical(df.apply(real_matched_to_str, axis=1), categories=['real', 'matched', 'unmatched'], ordered=True))
    df = df.assign(full_game_id=df.apply(full_game_id_to_str, axis=1))
    
    return df


def load_data_to_df(data_path: str, prolific_only: bool = True):
    prolific_participants_data, non_prolific_participants_data = load_data(data_path)
    if not prolific_only:
        prolific_participants_data.extend(non_prolific_participants_data)

    return prolific_participants_data, loaded_data_to_df(prolific_participants_data)


current_participants_raw_data, current_participants_df = load_data_to_df(CURRENT_DATA_PATH)
previous_participants_raw_data, previous_participants_df = load_data_to_df(PREVIOUS_DATA_PATH)
print(f'Loaded current participant data with shape {current_participants_df.shape} from {CURRENT_DATA_PATH}')
print(f'Loaded previous participant data with shape {previous_participants_df.shape} from {PREVIOUS_DATA_PATH}')

new_prolific_participant_ids = set(current_participants_df.prolific_id) - set(previous_participants_df.prolific_id)
print(f"{len(new_prolific_participant_ids)} new prolific participants:")
for prolific_id in sorted(new_prolific_participant_ids):
    print(f"\t{prolific_id}")

print(f'Dataframe shape: {current_participants_df.shape}')
current_participants_df.head()


Loaded current participant data with shape (489, 16) from ./human_evals_data/real-complete_only-main-pilot_01-27_data.json
Loaded previous participant data with shape (370, 16) from ./human_evals_data/real-complete_only-main-pilot_40_data.json
12 new prolific participants:
	5769a352f48c3b00016737f9
	5c1c74700036a80001193c24
	5cfb459229a6c2001764e10d
	5d350282cec7150015d16494
	5d453e8723a5bb0001492546
	5e5e623737be5e1762ad4418
	5eeaa8ee94a7f8046a02a20d
	60fdd725b2cd02db32f9c718
	6168d40137ffaa2e184d02da
	63614899ad49a91f1ef09067
	6564a4689e58a4504f5ee238
	656f18a4879aba85a3134f00
Dataframe shape: (489, 16)


Unnamed: 0,participant_id,prolific_id,game_id,real,matched,confident,fun_play,fun_watch,capability,goldilocks,creativity,human_likeness,explain,overall,game_type,full_game_id
0,00c9bf44-28f3-469a-8a71-ea972af61bab-p102,5ef4d8b87faaa00c0dea0c4e,"(1, 1, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0)",False,False,4,3,3,5,3,2,2,"The game is composed of two components. First,...",The instructions seem to be pretty straightfor...,unmatched,"(1, 1, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0)-unmatched"
1,00c9bf44-28f3-469a-8a71-ea972af61bab-p102,5ef4d8b87faaa00c0dea0c4e,"(1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0)",False,False,4,3,3,4,3,3,2,"First, you need to place the hexagon bin near ...",This games seems slightly more difficult that ...,unmatched,"(1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0)-unmatched"
2,00c9bf44-28f3-469a-8a71-ea972af61bab-p102,5ef4d8b87faaa00c0dea0c4e,"(1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0)",True,True,4,2,2,5,4,3,2,"First, you need to put the bin next to the bed...",This game is more challenging than the prior t...,real,"(1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0)-real"
3,00c9bf44-28f3-469a-8a71-ea972af61bab-p102,5ef4d8b87faaa00c0dea0c4e,"(1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0)",False,True,5,4,4,3,2,2,2,Place the bin next to the wall. Throw balls t...,This game has the simplest instructions. One c...,matched,"(1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0)-matched"
4,00c9bf44-28f3-469a-8a71-ea972af61bab-p102,5ef4d8b87faaa00c0dea0c4e,"(1, 1, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0)",False,False,1,1,1,1,5,1,3,Move chairs and then let them go so they end u...,I don't like this game. I don't really unders...,unmatched,"(1, 1, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0)-unmatched"
