In [39]:
import os
import sys
import urllib, io
import pickle

import numpy as np
import scipy.stats as stats
import pandas as pd
from sklearn.metrics import euclidean_distances, jaccard_score, pairwise_distances

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 
from IPython.core.display import HTML 

from io import BytesIO
import base64
import requests

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

sys.path.append("../../stimuli/block_utils/")
import blockworld_utils as utils

In [40]:
experiment_name = 'build_components'

## directory & file hierarchy
proj_dir = os.path.abspath('../..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(analysis_dir,'results')

# paths specific to this experiment
experiment_results_dir = os.path.join(results_dir, experiment_name)
plot_dir = os.path.join(experiment_results_dir,'plots')
csv_dir = os.path.join(experiment_results_dir,'csv')
json_dir = os.path.join(experiment_results_dir,'json')

png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

In [41]:
# set vars 
auth = pd.read_csv(os.path.join(proj_dir, 'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['block_construction']
coll = db['build_components']

# which iteration name should we use?
# iteration_name = 'build_components_build_recall_prolific_pilot_12_towers'
iteration_name = 'build_components_build_recall_prolific_pilot_6_towers_2_rep'
iteration_names = [iteration_name]

In [42]:
# all data
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_all = pd.DataFrame(query)
print(len(df_all))

8625


In [43]:
# load metadata
query = coll.find({"$and":[
                        {'datatype':'metadata'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_meta = pd.DataFrame(query)
print(len(df_meta))

12


In [44]:
# learning/ exposure trials
learn_conditions = ['block-tower-viewing','block-tower-building-undo']

query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type':{ '$in': learn_conditions }},
                        ]})
df_learn = pd.DataFrame(query)
print(len(df_learn))

627


In [45]:
df_all.datatype.unique()

array(['trial_end', 'reset', 'block_placement', 'block_undo_placement',
       'recalled_tower', 'block_redo_placement', 'metadata'], dtype=object)

In [46]:
# block placements
query = coll.find({"$and":[
                        {'datatype':'block_placement'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_blocks = pd.DataFrame(query)
print(len(df_blocks))

6037


In [47]:
# old-new judgements
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type':'block-tower-build-recall'},
                        ]})
df_recall_trial = pd.DataFrame(query)
print(len(df_recall_trial))

50


In [48]:
# old-new judgements
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype':'recalled_tower'},
                        ]})
df_recalled_towers = pd.DataFrame(query)
print(len(df_recalled_towers))

217


In [49]:
# look at survey responses
list(df_all[df_all.trial_type == 'survey-text']['response'])

[{'technical': '', 'confused': 'I struggled to recall', 'comments': ''},
 {'technical': 'No', 'confused': 'No', 'comments': 'No'},
 {'technical': 'no', 'confused': 'no', 'comments': 'no'},
 {'technical': '', 'confused': '', 'comments': ''},
 {'technical': 'No',
  'confused': 'No, although I struggled to recreate the buildings I had in mind',
  'comments': 'n/a'},
 {'technical': '', 'confused': '', 'comments': ''},
 {'technical': '', 'confused': '', 'comments': ''},
 {'technical': 'No', 'confused': 'No', 'comments': 'None'},
 {'technical': 'no', 'confused': 'no', 'comments': ''},
 {'technical': 'no', 'confused': 'yes', 'comments': 'no'},
 {'technical': 'No', 'confused': 'No', 'comments': 'No'},
 {'technical': '', 'confused': '', 'comments': ''},
 {'technical': 'no', 'confused': 'no', 'comments': 'none'},
 {'technical': 'no', 'confused': 'no', 'comments': 'no'},
 {'technical': 'None',
  'confused': "I got slightly confused at first because I couldn't place horziontal blocks and have them

### Exclusion criteria

In [50]:
df_all_trial = df_learn.append(df_recall_trial, ignore_index=True)

  df_all_trial = df_learn.append(df_recall_trial, ignore_index=True)


In [51]:
# remove experimenter data
remove_tests = False

if remove_tests:
    df_build = df_build[~df_build.workerID.isna()]
    df_survey = df_survey[~df_survey.workerID.isna()]
    df_learn = df_learn[~df_learn.workerID.isna()]
    df_recall = df_recall[~df_recall.workerID.isna()]

In [52]:
# remove incomplete datasets
remove_incomplete_datasets = True
n_expected_trials = 13

if remove_incomplete_datasets:
    a = df_all_trial.groupby('gameID').apply(len) == n_expected_trials
    complete_zipping_set_gameIDs = list(a[a].index)
    df_trials = df_all_trial[df_all_trial.gameID.isin(complete_zipping_set_gameIDs)]
    df_learn = df_learn[df_learn.gameID.isin(complete_zipping_set_gameIDs)]
    df_recalled_towers = df_recalled_towers[df_recalled_towers.gameID.isin(complete_zipping_set_gameIDs)]
    
    incomplete_zipping_set_gameIDs = list(a[~a].index)
    print(str(len(incomplete_zipping_set_gameIDs)) + ' ppts removed for incomplete data')
    print(str(len(complete_zipping_set_gameIDs)) + ' ppts left')
else: 
    print('No ppts removed')

10 ppts removed for incomplete data
50 ppts left


In [55]:
df_all_trial.groupby('workerID').apply(len)

workerID
5739549114f69e00067becd8    12
5780d9a1900cc80001d2d1c2    13
57dd3b0e91b7fc0001493db9    13
587bfa12f89eee0001b31187     5
588a59813e1f290001aa78c1    13
5980f157413c230001291b22    13
59c9cc4846f7210001906928    13
5a638e5c31b87a0001c6ed7e    13
5b13da851e55760001ba009a     2
5b1ba337c9d3cb00011030ca    13
5b1efc25bdf4e1000163af46    13
5b2a72f7c293b90001732b89    13
5b47e287bc06c90001f8b67d     1
5b98f31b9e2ffe0001c23910    13
5bc07659b8d38c0001de0516    14
5c096cc8b57e9500015a18c2    13
5c37ecbfbef9a20001e3b0d9    13
5c72aec2aa7a7700018f25c8    14
5c7eb988ac63960015004d03    13
5d49bae2428aab001910c21f    13
5d91e1e72a66dc00195db7dd    13
5e5f684cd6bfcc29d1e5da67    13
5e722d54811ec3046b5defbd     1
5e9838342f9c6001611f9b50    14
5ea16cb48366db01843ff38b    13
5eca536f9aa4f658d641a7d9    13
5ed2543942283e0366dadbdc    13
5ee683596f9b570626faa11c    13
5eeaff91fd89901efebf84b4    13
5eee761287fdd331fc618449    13
5f3013e31c8a690aacb02c31    13
5f4412624052727181839e3a    13

In [54]:
5d91e1e72a66dc00195db7dd

SyntaxError: invalid syntax (1372949129.py, line 1)

#### Flag potentially weird behaviors

Flag:
- < 50% accuracy?
- button bashing on recall trials?
- fast rts on recall trials?

In [56]:
df_trial = df_learn.append(df_recall_trial, ignore_index=True)

  df_trial = df_learn.append(df_recall_trial, ignore_index=True)


### Save data to csv

In [57]:
df_learn.to_csv(experiment_results_dir + '/df_learn_{}.csv'.format(iteration_name))
df_trial.to_csv(experiment_results_dir + '/df_trial_{}.csv'.format(iteration_name))
df_recall_trial.to_csv(experiment_results_dir + '/df_recall_trial_{}.csv'.format(iteration_name))
df_recalled_towers.to_csv(experiment_results_dir + '/df_recalled_towers_{}.csv'.format(iteration_name))
df_blocks.to_csv(experiment_results_dir + '/df_blocks_{}.csv'.format(iteration_name))

In [58]:
df_trial = pd.read_csv(experiment_results_dir + '/df_trial_{}.csv'.format(iteration_name))
df_trial

Unnamed: 0.1,Unnamed: 0,_id,trial_start_time,trial_finish_time,condition,stimulus,response,trial_num,block_str,tower_id,...,eventType,endReason,relative_time,rep,n_resets,absolute_time,total_recall_time,submitted_towers,concatenated_towers,n_towers
0,0,64274b0ee158fd5aa12435c5,1.680297e+12,1.680297e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,1,0000000000000000101000001010000011110000100100...,talls_114_111,...,,,,,,,,,,
1,1,64274b4ee158fd5aa12435d2,1.680297e+12,,build,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,2,0000000000000000010100000101000011110000011000...,talls_097_100,...,trial_end,perfect-reconstruction-translation,59538.0,1.0,1.0,,,,,
2,2,64274b5ee158fd5aa12435d3,1.680297e+12,1.680297e+12,view,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",,3,0000000000000000110100000101000001110000111100...,talls_121_101,...,,,,,,,,,,
3,3,64274b6fe158fd5aa12435d4,1.680297e+12,1.680297e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 2, 'wid...",,4,0000000000000000011100000110000011100000111100...,talls_102_126,...,,,,,,,,,,
4,4,64274b96e158fd5aa12435e8,1.680297e+12,,build,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,5,0000000000000000111000000110000001110000011000...,talls_118_114,...,trial_end,perfect-reconstruction-translation,34840.0,1.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
645,645,6446e103da7c3649c9c2a376,1.682366e+12,,,,,13,,,...,,,,,1.0,1.682367e+12,492318.0,"[{'towerDetails': {'blocks': [{'x': 4, 'y': 0,...","[{'height': 2, 'width': 1, 'x': 0, 'y': 0}, {'...",6.0
646,646,6446e20fda7c3649c9c2a419,1.682367e+12,,,,,61,,,...,,,,,1.0,1.682367e+12,283988.0,"[{'towerDetails': {'blocks': [{'x': 3, 'y': 0,...","[{'height': 1, 'width': 2, 'x': 0, 'y': 0}, {'...",4.0
647,647,6446e36bda7c3649c9c2a47a,1.682367e+12,,,,,13,,,...,,,,,1.0,1.682367e+12,733328.0,"[{'towerDetails': {'blocks': [{'x': 4, 'y': 0,...","[{'height': 2, 'width': 1, 'x': 1, 'y': 0}, {'...",6.0
648,648,6446e395da7c3649c9c2a485,1.682367e+12,,,,,13,,,...,,,,,0.0,1.682367e+12,135673.0,"[{'towerDetails': {'blocks': [{'x': 4, 'y': 0,...","[{'height': 2, 'width': 1, 'x': 1, 'y': 0}, {'...",4.0
