In [1]:

import json
import pandas as pd
from IPython.display import display, Markdown, Latex

import textstat
import tabulate

In [43]:
KEYS_TO_EXTRACT = ('participantID', 'timestamp', 'scene', 
    'game.setup', 'game.gameplay', 'game.scoring', 'game.difficulty', 'game.firstTimeScore', 
    'gameScore.score', 'gameScore.thoughts', 
    'debrief.strategy', 'debrief.difficulties', 'debrief.questions', 'debrief.external_aids')

GAME_KEYS = ('setup', 'gameplay', 'scoring', 'difficulty', 'firstTimeScore')

SHORT_SCENE_NAMES = {
    'FloorPlan326_physics_semi_sparse_few_new_objects': 'few_objects',
    'FloorPlan326_physics_semi_sparse_new_objects': 'medium_objects',
    'FloorPlan326_physics_semi_sparse_many_new_objects': 'many_objects',
}

GAME_TEMPLATE = """(define (game {participantID}) (:domain {room}-objects-room-v1)  ; {index}
(:setup (and 

))
(:constraints (and 

))
(:scoring maximize

))"""

def recursive_extract_value(d, key):
    if '.' in key:
        split_key = key.split('.')

    else:
        split_key = [key]

    value = d
    for key_part in split_key:
        if key_part == 'game':
            key_part = 'editedGame' if 'editedGame' in value else 'initialGame'
                
        value = value[key_part] if key_part in value else None
        if value == None:
            return value

    return value


def is_edited_and_fields(doc_dict):
    if 'initialGame' not in doc_dict:
        return None, []

    if 'editedGame' not in doc_dict:
        return False, []

    edited_fields = [key for key in GAME_KEYS if doc_dict['initialGame'][key] != doc_dict['editedGame'][key]]
    return any(edited_fields), edited_fields


def participant_dict_to_row(doc, keys=KEYS_TO_EXTRACT):
    d = doc.to_dict()
    row = {'id': doc.id}
    row.update({key.replace('.', '_'): recursive_extract_value(d, key) for key in keys})
    game_edited, edited_fields = is_edited_and_fields(d)
    row['game_edited'] = game_edited
    row['edited_game_fields'] = ','.join(edited_fields)
    return row


def print_participant(df, index, game_fields=('game_setup', 'game_gameplay', 'game_scoring', 'game_difficulty', 'game_firstTimeScore')):
    p = df.loc[index]
    display(Markdown(f'## {p.participantID} ({p.id}) ({p.scene})'))
    display(Markdown(f'Collected at {p.timestamp}'))

    for game_field in game_fields:
        display(Markdown(f'### {game_field.replace("game_", "")}:'))
        display(Markdown(str(p[game_field])))

    room = p.scene.split('_')[0] if p.scene is not None else ''
    print(GAME_TEMPLATE.format(participantID=p.participantID, room=room, index=index))
    print()

    schema_template = dict(metadata=dict(prolific_id=p.participantID, id=p.id, index=index, room=p.scene, notes='')) 
    print(json.dumps(schema_template, indent=4))
    

STATISTICS_CSV_COLUMNS = (
    'id', 'participantID', 'timestamp', 'scene', 
    'game_setup', 'game_gameplay', 'game_scoring', 'game_difficulty',
    'game_firstTimeScore', 'gameScore_score', 'gameScore_thoughts',
    'game_edited', 'edited_game_fields'
)

STATISTICS_CSV_COLUMN_REMAPPING = {'participantID': 'prolific_id'}

STATISTICS_CSV_OUT_PATH = '../data/interactive_beta_firestore_statistics.csv'

STATISTICS_IDS_TO_SKIP = ('GLPtcvJUaHkUYK7iEPRq', 'zhq2iVuBVQxs15gj7Blw')


def df_to_statistics_csv(df, out_path=STATISTICS_CSV_OUT_PATH):
    out_df = df.reindex(columns=STATISTICS_CSV_COLUMNS)
    out_df = out_df.loc[[pid not in STATISTICS_IDS_TO_SKIP for pid in out_df.id], :]
    out_df = out_df.rename(columns=STATISTICS_CSV_COLUMN_REMAPPING)
    print(out_df.columns)
    out_df.to_csv(out_path)


In [44]:
LOAD_DATA_FROM_FIRESTORE = False
WRITE_DATA_TO_CSV = False
PARTICIPANTS_CSV_PATH = '../data/interactive_beta.csv'
COLLECTION_NAME = 'participants-v2'


if LOAD_DATA_FROM_FIRESTORE:
    import firebase_admin
    from firebase_admin import credentials
    from firebase_admin import firestore

    # Use a service account
    cred = credentials.Certificate('./game-generation-6db9c-aef76f1917f8.json')
    firebase_admin.initialize_app(cred)

    db = firestore.client()

    participants_with_replays = db.collection(COLLECTION_NAME).order_by('timestamp', direction=firestore.Query.ASCENDING).order_by('replays', direction=firestore.Query.DESCENDING).stream()
    participant_rows = [participant_dict_to_row(doc) for doc in participants_with_replays]
    participant_df = pd.DataFrame(participant_rows)
    participant_df.scene = [SHORT_SCENE_NAMES[s] if s in SHORT_SCENE_NAMES else None for s in participant_df.scene]
    participant_df = participant_df[participant_df.scene.notna() & participant_df.game_setup.notna() & participant_df.game_gameplay.notna() & participant_df.game_scoring.notna()]
    participant_df = participant_df[participant_df.participantID.str.len() > 10]
    participant_df = participant_df[participant_df.participantID != '5f63a8f17e0e2f0c5aebfc0b']
    participant_df = participant_df.reset_index()

    if WRITE_DATA_TO_CSV:
        participant_df.to_csv(PARTICIPANTS_CSV_PATH)
        df_to_statistics_csv(participant_df)

else:
    participant_df = pd.read_csv(PARTICIPANTS_CSV_PATH)



In [41]:
print(participant_df.shape)
participant_df.head()

(54, 19)


Unnamed: 0.1,Unnamed: 0,index,id,participantID,timestamp,scene,game_setup,game_gameplay,game_scoring,game_difficulty,game_firstTimeScore,gameScore_score,gameScore_thoughts,debrief_strategy,debrief_difficulties,debrief_questions,debrief_external_aids,game_edited,edited_game_fields
0,0,17,RfP7c4trKFGmDp6xpiG8,6172feb1665491d1efbce164,2021-10-22 20:57:12.168000+00:00,medium_objects,Place small ramp in front of bin where there i...,"Take ball (any), and try to get it into the bi...",1 point per successful hit.,1,6,,,,,,,False,
1,1,18,eYM9NgeUlg9mGpwlGemD,613a4662d21e1715cc079587,2021-10-22 20:58:20.433000+00:00,many_objects,,The game I can play in this room it is call me...,the scoring system can be the more hits you ha...,1,"from 1 to 10. I would say an 8 haha, I'm not g...",I scored 7,I think it might be better if the objects disa...,Starting by memorizing the shapes of objects a...,"yes, it could be that the game programming is ...",I would like to play the game when it is fully...,no,False,
2,2,19,SFpazSkgQ7MFSwDEa3c9,5f77754ba932fb2c4ba181d8,2021-10-22 21:02:15.391000+00:00,many_objects,Open the top drawer beside your bed.,First you pick up a dodgeball or a golf ball a...,To score in this game you just have to throw a...,1,10,,,,,,,False,
3,3,20,lchV8TQjaHcYtHAqR31Q,614b603d4da88384282967a7,2021-10-23 20:36:08.420000+00:00,many_objects,,Create a tower with the largest number of figu...,Each level of the tower will count as 1 point,3,I would score 6 points,5,It is very hard,I thought as if I were a child playing with to...,No.,It would be nice to let the user change the ke...,"No, just my imagination",False,
4,4,21,tM0MidzjBJBLkOEPYr2F,616a7fe177d97578e592113b,2021-10-23 22:33:04.491000+00:00,medium_objects,The ball must roll down the ramp and hit as ma...,Minni ball bash,The more blocks it hits the more points you get,2,4,2,I dont like this game that mutch,All making use of grafity and placement of the...,No. The game is unprodictable and that was the...,No,No,False,


In [45]:
df_to_statistics_csv(participant_df)

Index(['id', 'prolific_id', 'timestamp', 'scene', 'game_setup',
       'game_gameplay', 'game_scoring', 'game_difficulty',
       'game_firstTimeScore', 'gameScore_score', 'gameScore_thoughts',
       'game_edited', 'edited_game_fields'],
      dtype='object')


In [14]:

print_participant(participant_df, 17)

## 613e4bf960ca68f8de00e5e7 (GLPtcvJUaHkUYK7iEPRq) (medium_objects)

Collected at 2021-11-08 21:14:13.145000+00:00

### setup:



### gameplay:

The pieces on the shelf between the two windows must be stacked in such a way as to create a kind of castle. You have to create two equal castles, the colors are not important but if you choose certain colors points are added.

To create each castle you have to follow this order of pieces (from bottom to top):
- Bridge
- Flat piece
- Cylinder
- Square
- Pyramid

### scoring:

For each castle built in the correct order of pieces will be awarded 10 points.

If the castles follow this order of colors extra points will be awarded:

First castle:
- Green bridge
- Glat yellow piece
- Yellow cylinder
- Green square
- Orange pyramid

Second castle: 
- Bridge color wood
- Flat gray piece
- Cylinder color wood
- Blue cylinder
- Red pyramid

If 3 of the pieces have the correct color order (in the corresponding village) 5 points per castle will be awarded, if 4 colors are good it will be 7 points per castle and if all colors are good, 10 points per castle. The maximum score is 40 points.

### difficulty:

0

### firstTimeScore:

40

(define (game 613e4bf960ca68f8de00e5e7) (:domain medium-objects-room-v1)  ; 17
(:setup (and 

))
(:constraints (and 

))
(:scoring maximize

))

{
    "metadata": {
        "prolific_id": "613e4bf960ca68f8de00e5e7",
        "id": "GLPtcvJUaHkUYK7iEPRq",
        "index": 17,
        "room": "medium_objects",
        "notes": ""
    }
}


# Statistics on plaintext versions

In [6]:
DEFAULT_STATS_FIELDS = ('game_setup', 'game_gameplay', 'game_scoring')

def run_textstat_func(textstat_func, df, fields=DEFAULT_STATS_FIELDS):
    if isinstance(fields, slice):
        subset = df.iloc[:, fields].copy()
    else:
        subset = df.loc[:, fields].copy()
        
    subset[subset.isna()] = ''
    s = subset.agg('\n'.join, axis=1)
    scores = s.apply(textstat_func)
    return scores.mean(), scores.std() / (len(scores) ** 0.5)

In [7]:
run_textstat_func(textstat.flesch_reading_ease, participant_df)

(75.63380952380952, 4.418337517351245)

In [8]:
few_objects_df = pd.read_csv('../data/few_objects.csv')
medium_objects_df = pd.read_csv('../data/medium_objects.csv')
many_objects_df = pd.read_csv('../data/many_objects.csv')

In [9]:
textstat.flesch_reading_ease.__name__

'flesch_reading_ease'

In [11]:
TEXTSTAT_FUNCS = (textstat.flesch_reading_ease, textstat.flesch_kincaid_grade, textstat.gunning_fog)
NAMES = ['survey - few objects', 'survey - medium objects', 'survey - many objects', 'interactive beta']

rows = []

for i, df in enumerate((few_objects_df, medium_objects_df, many_objects_df, participant_df)):
    scores = [run_textstat_func(func, df, fields=slice(1, 4) if i < 3 else DEFAULT_STATS_FIELDS) for func in TEXTSTAT_FUNCS]
    rows.append([NAMES[i]] + [f'$ {s[0]:.2f} \\pm {s[1]:.2f} $' for s in scores])

headers = ['name'] + [func.__name__ for func in TEXTSTAT_FUNCS]

display(Markdown(tabulate.tabulate(rows, headers=headers, tablefmt='github')))

# few_objects_df.iloc[:, slice(1, 4)].agg(lambda x: [type(z) for z in x], axis=1)

| name                    | flesch_reading_ease   | flesch_kincaid_grade   | gunning_fog        |
|-------------------------|-----------------------|------------------------|--------------------|
| survey - few objects    | $ 61.48 \pm 7.66 $    | $ 14.73 \pm 2.96 $     | $ 17.11 \pm 3.06 $ |
| survey - medium objects | $ 61.76 \pm 5.55 $    | $ 13.73 \pm 2.08 $     | $ 16.03 \pm 2.12 $ |
| survey - many objects   | $ 56.42 \pm 5.95 $    | $ 15.56 \pm 2.23 $     | $ 17.65 \pm 2.30 $ |
| interactive beta        | $ 75.63 \pm 4.42 $    | $ 9.39 \pm 1.59 $      | $ 11.83 \pm 1.57 $ |