In [437]:
import os
import sys
import urllib, io
import pickle

import numpy as np
import scipy.stats as stats
import pandas as pd
from sklearn.metrics import euclidean_distances, jaccard_score, pairwise_distances

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 
from IPython.core.display import HTML 

from io import BytesIO
import base64
import requests

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

sys.path.append("../../stimuli/block_utils/")
import blockworld_utils as utils

In [533]:
experiment_name = 'build_components'

## directory & file hierarchy
proj_dir = os.path.abspath('../..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(proj_dir,'results')

# paths specific to this experiment
experiment_results_dir = os.path.join(results_dir, experiment_name)
plot_dir = os.path.join(experiment_results_dir,'plots')
csv_dir = os.path.join(experiment_results_dir,'csv')
json_dir = os.path.join(experiment_results_dir,'json')

png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

In [762]:
# set vars 
auth = pd.read_csv(os.path.join(proj_dir, 'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['block_construction']
coll = db['build_components']

In [827]:
# plugin names ({'datatype': 'trial_end'} & {'trial_type': xxxxxxxx})

# encode
BUILD_COPY = 'block-tower-building-undo'
TOWER_VIEWING = 'block-tower-viewing'
MATCH = 'block-tower-match-to-sample'
BUILD_WM = 'block-tower-building-undo-nostim'

ENCODE_TASKS = [BUILD_COPY, TOWER_VIEWING, MATCH, BUILD_WM]

# decode 
OLD_NEW = 'block-tower-old-new-img'
BUILD_RECALL = 'block-tower-building-recall-choose-color'

DECODE_TASKS = [OLD_NEW, BUILD_RECALL]

# additional data types ({'datatype': xxxxxx})
BLOCK = 'block_placement' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)
RESET = 'reset' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)
UNDO = 'block_undo_placement' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)
REDO = 'block_redo_placement' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)

In [828]:
# iteration names

# iteration_name = 'build_components_cogsci_ve_old_new_data_run_through_2'
# iteration_name = 'build_components_cogsci_ve_recall_data_run_through'
# iteration_name = 'build_components_cogsci_wm_old_new_data_run_through'
# iteration_name = 'build_components_cogsci_wm_recall_data_run_through'


iteration_name = "build_components_cogsci_ve_old_new_prolific_pilot_0"
# iteration_name = "build_components_cogsci_ve_recall_prolific_pilot_0"
# iteration_name = "build_components_cogsci_wm_old_new_prolific_pilot_0"
# iteration_name = "build_components_cogsci_wm_recall_prolific_pilot_0"


iteration_names = [iteration_name]

# dataframe plan

df_encode: encode phase from all iterations

df_encode_ve: all visual exposure trials
df_encode_wm: all working memory trials

df_recall: recall only 
df_recog: old-new only


We rarely compare between VE and WM.
It's more important for us to compare conditions within recog and within recall


In [829]:
# all data
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_all = pd.DataFrame(query)
print(len(df_all))

517


In [830]:
df_all.columns

Index(['_id', 'rt', 'url', 'trial_type', 'trial_index', 'time_elapsed',
       'internal_node_id', 'experimentName', 'iterationName', 'workerID',
       'gameID', 'studyLocation', 'datatype', 'view_history', 'success',
       'timeout', 'failed_images', 'failed_audio', 'failed_video',
       'n_blocks_when_reset', 'block_str', 'tower_id', 'tower_A_tall_id',
       'tower_A_wide_id', 'tower_B_tall_id', 'tower_B_wide_id',
       'tower_id_tall', 'composite_id', 'trial_num', 'absolute_time',
       'trial_start_time', 'relative_time', 'stimulus', 'condition', 'n_block',
       'n_resets', 'towerColor', 'timeAbsolute', 'timeRelative', 'blocks',
       'discreteWorld', 'eventType', 'block', 'endReason', 'trial_finish_time',
       'rep', 'response', 'novelty', 'response_meaning', 'response_correct',
       'key_presses', 'distractorKind'],
      dtype='object')

In [831]:
df_all.trial_type.unique()

array(['external-html', 'instructions', 'preload',
       'block-tower-building-undo', 'block-tower-viewing',
       'block-tower-old-new-img', 'survey-text'], dtype=object)

In [832]:
# I don't think metadata is saved anywhere.
query = coll.find({"$and":[
                        {'datatype':'metadata'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_meta = pd.DataFrame(query)
print(len(df_meta))

0


In [833]:
# exit survey responses
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type': {'$in': ['survey-text']}}
                        ]})
df_survey = pd.DataFrame(query)
print(len(df_survey))
_ = [print(response) for response in df_survey.response]

5
{'technical': 'No', 'confused': 'No', 'comments': 'Thank you'}
{'technical': 'no', 'confused': 'no', 'comments': ''}
{'technical': 'No', 'confused': 'No', 'comments': 'Very difficult!'}
{'technical': 'no', 'confused': 'at the start but made more sense was i placed the blocks', 'comments': 'no'}
{'technical': 'no', 'confused': 'no', 'comments': 'i found remembering the colors more difficult than remembering the shapes'}


# trial end data

In [834]:
df_trial

Unnamed: 0,_id,trial_start_time,trial_finish_time,relative_time,rep,condition,stimulus,response,trial_num,towerColor,...,studyLocation,datatype,timeAbsolute,timeRelative,blocks,discreteWorld,eventType,endReason,n_resets,color
0,659cb6b7c5c2296bd578d285,1.704769e+12,1.704769e+12,15025,1,view,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",,1,"[214, 19, 87, 255]",...,Prolific,trial_end,,,,,,,,
1,659cb6fcc5c2296bd578d2ab,1.704769e+12,1.704769e+12,15027,1,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,1,"[214, 186, 241, 255]",...,Prolific,trial_end,,,,,,,,
2,659cb707c5c2296bd578d2b8,1.704769e+12,1.704769e+12,74690,1,build,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,2,"[214, 186, 241, 255]",...,Prolific,trial_end,1.704769e+12,217851.0,"[{'x': 4, 'y': 0, 'width': 2, 'height': 1}, {'...","[[True, True, True, True, True, True, True, Tr...",trial_end,perfect-reconstruction-translation,2.0,
3,659cb709c5c2296bd578d2bc,1.704769e+12,1.704769e+12,15042,1,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 2, 'wid...",,1,"[214, 186, 241, 255]",...,Prolific,trial_end,,,,,,,,
4,659cb723c5c2296bd578d2dd,1.704769e+12,1.704769e+12,33925,1,build,"{'blocks': [{'x': 0, 'y': 0, 'height': 2, 'wid...",,2,"[101, 101, 101, 255]",...,Prolific,trial_end,1.704769e+12,137270.0,"[{'x': 4, 'y': 0, 'width': 1, 'height': 2}, {'...","[[True, True, True, True, True, True, True, Tr...",trial_end,perfect-reconstruction-translation,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,659cb966c5c2296bd578d90f,1.704770e+12,,40551,0,,,,16,,...,Prolific,trial_end,1.704770e+12,737000.0,"[{'x': 4, 'y': 0, 'width': 1, 'height': 2}, {'...","[[True, True, True, True, True, True, True, Tr...",trial_end,submit-pressed,1.0,"[214, 186, 241, 255]"
79,659cb971c5c2296bd578d93b,1.704770e+12,,8818,0,,,,17,,...,Prolific,trial_end,,,,,,give-up,2.0,"[85, 111, 243, 255]"
80,659cb972c5c2296bd578d93f,1.704770e+12,,46801,0,,,,16,,...,Prolific,trial_end,,,,,,give-up,3.0,"[85, 111, 243, 255]"
81,659cb97bc5c2296bd578d957,1.704770e+12,,32258,0,,,,17,,...,Prolific,trial_end,1.704770e+12,660893.0,"[{'x': 4, 'y': 0, 'width': 2, 'height': 1}, {'...","[[True, True, True, True, True, True, True, Tr...",trial_end,submit-pressed,2.0,"[101, 101, 101, 255]"


In [835]:
# trial-end
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype':'trial_end'},
                        {'trial_type': {'$nin': ['instructions','preload','external-html','survey-text']}}
                        ]})
df_trial = pd.DataFrame(query)
print(len(df_trial))

120


In [836]:
df_trial.relative_time

0       76748
1       15031
2       35913
3       40226
4      105872
        ...  
115      2244
116      4803
117      4849
118      1972
119      2740
Name: relative_time, Length: 120, dtype: int64

## encode phase

In [837]:
# learning/ exposure trials

query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype': 'trial_end'},
                        {'trial_type':{ '$in': ENCODE_TASKS }},
                        ]})
df_encode = pd.DataFrame(query)
print(len(df_encode))
if len(df_encode) > 0:
    print('encode trials found:', list(df_encode.trial_type.unique()))

60
encode trials found: ['block-tower-building-undo', 'block-tower-viewing']


In [838]:
# in the WM versions, 'block-tower-viewing' trials appear in both conditions as the 'STUDY' part of both tasks

## decode phase

In [839]:
# old-new judgements
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype': 'trial_end'},
                        {'trial_type':{ '$in': DECODE_TASKS }},
                        ]})
df_decode = pd.DataFrame(query)
print(len(df_decode))
if len(df_decode) > 0:
    print('decode trials found:', list(df_decode.trial_type.unique()))

60
decode trials found: ['block-tower-old-new-img']


In [840]:
# recalled towers are saved one per trial, in up to 6 trials
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype': 'trial_end'},
                        {'trial_type': BUILD_RECALL},
                        ]})
df_recalled_towers = pd.DataFrame(query)
print(len(df_recalled_towers))

0


## additional data

In [841]:
# block placements
query = coll.find({"$and":[
                        {'datatype': BLOCK},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_block = pd.DataFrame(query)
print(len(df_block))
print('individual block data found in:', list(df_block.trial_type.unique()))

287
individual block data found in: ['block-tower-building-undo']


In [842]:
# resets
query = coll.find({"$and":[
                        {'datatype': RESET},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_reset = pd.DataFrame(query)
print(len(df_reset))
if len(df_reset) > 0:
    print('reset data found in:', list(df_reset.trial_type.unique()))

33
reset data found in: ['block-tower-building-undo']


In [843]:
# undos
query = coll.find({"$and":[
                        {'datatype': UNDO},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_undo = pd.DataFrame(query)
print(len(df_undo))
if len(df_undo) > 0:
    print('undo data found in:', list(df_undo.trial_type.unique()))

42
undo data found in: ['block-tower-building-undo']


In [844]:
# redos
query = coll.find({"$and":[
                        {'datatype': REDO},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_redo = pd.DataFrame(query)
print(len(df_redo))
if len(df_redo) > 0:
    print('redo data found in:', list(df_redo.trial_type.unique()))

0


In [845]:
df_undo_redo = pd.concat([df_undo, df_redo], ignore_index=True)
df_construction_procedure = pd.concat([df_block, df_undo, df_redo, df_reset], ignore_index=True)\
                              .sort_values(['gameID','trial_num','relative_time'], ascending=True).reset_index()

In [846]:
df_encode.trial_type.unique()

array(['block-tower-building-undo', 'block-tower-viewing'], dtype=object)

In [847]:
df_decode.trial_type.unique()

array(['block-tower-old-new-img'], dtype=object)

In [848]:
df_block.trial_type.unique()

array(['block-tower-building-undo'], dtype=object)

In [849]:
df_reset.trial_type.unique()

array(['block-tower-building-undo'], dtype=object)

In [850]:
df_construction_procedure.datatype.unique()

array(['reset', 'block_placement', 'block_undo_placement'], dtype=object)

In [851]:
df_construction_procedure.trial_type.unique()

array(['block-tower-building-undo'], dtype=object)

In [852]:
df_construction_procedure[['gameID','trial_num','relative_time','datatype','trial_type']]

Unnamed: 0,gameID,trial_num,relative_time,datatype,trial_type
0,2225-44e783ae-b85d-4333-b0f5-b41e38786391,1,48,reset,block-tower-building-undo
1,2225-44e783ae-b85d-4333-b0f5-b41e38786391,1,31333,reset,block-tower-building-undo
2,2225-44e783ae-b85d-4333-b0f5-b41e38786391,1,41834,block_placement,block-tower-building-undo
3,2225-44e783ae-b85d-4333-b0f5-b41e38786391,1,55269,block_placement,block-tower-building-undo
4,2225-44e783ae-b85d-4333-b0f5-b41e38786391,1,65402,block_placement,block-tower-building-undo
...,...,...,...,...,...
357,9458-19ad828d-8101-4a85-bb7d-682c2d3be804,11,11729,block_placement,block-tower-building-undo
358,9458-19ad828d-8101-4a85-bb7d-682c2d3be804,11,14366,block_placement,block-tower-building-undo
359,9458-19ad828d-8101-4a85-bb7d-682c2d3be804,11,16640,block_placement,block-tower-building-undo
360,9458-19ad828d-8101-4a85-bb7d-682c2d3be804,11,23673,block_placement,block-tower-building-undo


## export data

In [853]:
df_encode.to_csv(experiment_results_dir + '/cogsci24/df_encode_{}.csv'.format(iteration_name))
df_decode.to_csv(experiment_results_dir + '/cogsci24/df_decode_{}.csv'.format(iteration_name))
df_block.to_csv(experiment_results_dir + '/cogsci24/df_block_{}.csv'.format(iteration_name))
df_reset.to_csv(experiment_results_dir + '/cogsci24/df_reset_{}.csv'.format(iteration_name))
df_construction_procedure.to_csv(experiment_results_dir + '/cogsci24/df_construction_procedure_{}.csv'.format(iteration_name))
if len(df_recalled_towers) > 0:
    df_recalled_towers.to_csv(experiment_results_dir + '/cogsci24/df_recalled_towers_{}.csv'.format(iteration_name))

In [854]:
! open ~/zipping/results/build_components/cogsci24/

### Exclusion criteria (implement in analysis scripts)

In [855]:
# df_all_trial = pd.concat([df_learn, df_recalled_towers], ignore_index=True)

In [856]:
# # remove experimenter data
# remove_tests = False

# if remove_tests:
#     df_build = df_build[~df_build.workerID.isna()]
#     df_survey = df_survey[~df_survey.workerID.isna()]
#     df_learn = df_learn[~df_learn.workerID.isna()]
#     df_recall = df_recall[~df_recall.workerID.isna()]

In [857]:
# df_learn.groupby(['workerID','gameID']).apply(len)

In [858]:
# # remove incomplete datasets (build recall)
# remove_incomplete_datasets = True
# n_expected_learn_trials = 18

# if remove_incomplete_datasets:
#     a = df_learn.groupby('gameID').apply(len) == n_expected_learn_trials
#     complete_zipping_set_gameIDs = list(a[a].index)
#     df_trials = df_all_trial[df_all_trial.gameID.isin(complete_zipping_set_gameIDs)]
#     df_learn = df_learn[df_learn.gameID.isin(complete_zipping_set_gameIDs)]
#     df_recalled_towers = df_recalled_towers[df_recalled_towers.gameID.isin(complete_zipping_set_gameIDs)]
    
#     incomplete_zipping_set_gameIDs = list(a[~a].index)
#     print(str(len(incomplete_zipping_set_gameIDs)) + ' ppts removed for incomplete data')
#     print(str(len(complete_zipping_set_gameIDs)) + ' ppts left')
# else: 
#     print('No ppts removed')