In [865]:
import os
import sys
import urllib, io
import pickle

import numpy as np
import scipy.stats as stats
import pandas as pd
from sklearn.metrics import euclidean_distances, jaccard_score, pairwise_distances

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 
from IPython.core.display import HTML 

from io import BytesIO
import base64
import requests

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

sys.path.append("../../stimuli/block_utils/")
import blockworld_utils as utils

In [866]:
experiment_name = 'build_components'

## directory & file hierarchy
proj_dir = os.path.abspath('../..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(proj_dir,'results')

# paths specific to this experiment
experiment_results_dir = os.path.join(results_dir, experiment_name)
plot_dir = os.path.join(experiment_results_dir,'plots')
csv_dir = os.path.join(experiment_results_dir,'csv')
json_dir = os.path.join(experiment_results_dir,'json')

png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

In [867]:
# set vars 
auth = pd.read_csv(os.path.join(proj_dir, 'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['block_construction']
coll = db['build_components']

In [1120]:
# plugin names ({'datatype': 'trial_end'} & {'trial_type': xxxxxxxx})

# encode
BUILD_COPY = 'block-tower-building-undo'
TOWER_VIEWING = 'block-tower-viewing'
MATCH = 'block-tower-match-to-sample'
BUILD_WM = 'block-tower-building-undo-nostim'

ENCODE_TASKS = [BUILD_COPY, TOWER_VIEWING, MATCH, BUILD_WM]

# decode 
OLD_NEW = 'block-tower-old-new-img'
BUILD_RECALL = 'block-tower-building-recall-choose-color'

DECODE_TASKS = [OLD_NEW, BUILD_RECALL]

# additional data types ({'datatype': xxxxxx})
BLOCK = 'block_placement' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)
RESET = 'reset' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)
UNDO = 'block_undo_placement' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)
REDO = 'block_redo_placement' # check that this is saved from all building plugins (BUILD_COPY, BUILD_WM, BUILD_RECALL)

In [1121]:
# iteration names

# iteration_name = 'build_components_cogsci_ve_old_new_data_run_through_2'
# iteration_name = 'build_components_cogsci_ve_recall_data_run_through'
# iteration_name = 'build_components_cogsci_wm_old_new_data_run_through'
# iteration_name = 'build_components_cogsci_wm_recall_data_run_through'


# iteration_name = "build_components_cogsci_ve_old_new_prolific_pilot_0"
iteration_name = "build_components_cogsci_ve_recall_prolific_pilot_0"
# iteration_name = "build_components_cogsci_wm_old_new_prolific_pilot_0"
# iteration_name = "build_components_cogsci_wm_recall_prolific_pilot_0"


iteration_names = [iteration_name]

# dataframe plan

df_encode: encode phase from all iterations

df_encode_ve: all visual exposure trials
df_encode_wm: all working memory trials

df_recall: recall only 
df_recog: old-new only


We rarely compare between VE and WM.
It's more important for us to compare conditions within recog and within recall


In [1122]:
# all data
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_all = pd.DataFrame(query)
print(len(df_all))

In [1123]:
df_all.columns

In [1124]:
df_all.trial_type.unique()

In [1125]:
# I don't think metadata is saved anywhere.
query = coll.find({"$and":[
                        {'datatype':'metadata'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_meta = pd.DataFrame(query)
print(len(df_meta))

In [1126]:
# exit survey responses
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type': {'$in': ['survey-text']}}
                        ]})
df_survey = pd.DataFrame(query)
print(len(df_survey))
_ = [print(response) for response in df_survey.response]

# trial end data

In [1127]:
df_trial

In [1128]:
# trial-end
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype':'trial_end'},
                        {'trial_type': {'$nin': ['instructions','preload','external-html','survey-text']}}
                        ]})
df_trial = pd.DataFrame(query)
print(len(df_trial))

In [1129]:
df_trial.relative_time

## encode phase

In [1130]:
# learning/ exposure trials

query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype': 'trial_end'},
                        {'trial_type':{ '$in': ENCODE_TASKS }},
                        ]})
df_encode = pd.DataFrame(query)
print(len(df_encode))
if len(df_encode) > 0:
    print('encode trials found:', list(df_encode.trial_type.unique()))

In [1131]:
# in the WM versions, 'block-tower-viewing' trials appear in both conditions as the 'STUDY' part of both tasks

## decode phase

In [1132]:
# old-new judgements
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype': 'trial_end'},
                        {'trial_type':{ '$in': DECODE_TASKS }},
                        ]})
df_decode = pd.DataFrame(query)
print(len(df_decode))
if len(df_decode) > 0:
    print('decode trials found:', list(df_decode.trial_type.unique()))

In [1133]:
# recalled towers are saved one per trial, in up to 6 trials
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype': 'trial_end'},
                        {'trial_type': BUILD_RECALL},
                        ]})
df_recalled_towers = pd.DataFrame(query)
print(len(df_recalled_towers))

## additional data

In [1134]:
# block placements
query = coll.find({"$and":[
                        {'datatype': BLOCK},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_block = pd.DataFrame(query)
print(len(df_block))
print('individual block data found in:', list(df_block.trial_type.unique()))

In [1135]:
# resets
query = coll.find({"$and":[
                        {'datatype': RESET},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_reset = pd.DataFrame(query)
print(len(df_reset))
if len(df_reset) > 0:
    print('reset data found in:', list(df_reset.trial_type.unique()))

In [1136]:
# undos
query = coll.find({"$and":[
                        {'datatype': UNDO},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_undo = pd.DataFrame(query)
print(len(df_undo))
if len(df_undo) > 0:
    print('undo data found in:', list(df_undo.trial_type.unique()))

In [1137]:
# redos
query = coll.find({"$and":[
                        {'datatype': REDO},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_redo = pd.DataFrame(query)
print(len(df_redo))
if len(df_redo) > 0:
    print('redo data found in:', list(df_redo.trial_type.unique()))

In [1138]:
df_undo_redo = pd.concat([df_undo, df_redo], ignore_index=True)
df_construction_procedure = pd.concat([df_block, df_undo, df_redo, df_reset], ignore_index=True)\
                              .sort_values(['gameID','trial_num','relative_time'], ascending=True).reset_index()

In [1139]:
df_encode.trial_type.unique()

In [1140]:
df_decode.trial_type.unique()

In [1141]:
df_block.trial_type.unique()

In [1142]:
df_reset.trial_type.unique()

In [1143]:
df_construction_procedure.datatype.unique()

In [1144]:
df_construction_procedure.trial_type.unique()

In [1145]:
df_construction_procedure[['gameID','trial_num','relative_time','datatype','trial_type']]

## export data

In [1146]:
df_encode.to_csv(experiment_results_dir + '/cogsci24/df_encode_{}.csv'.format(iteration_name))
df_decode.to_csv(experiment_results_dir + '/cogsci24/df_decode_{}.csv'.format(iteration_name))
df_block.to_csv(experiment_results_dir + '/cogsci24/df_block_{}.csv'.format(iteration_name))
df_reset.to_csv(experiment_results_dir + '/cogsci24/df_reset_{}.csv'.format(iteration_name))
df_construction_procedure.to_csv(experiment_results_dir + '/cogsci24/df_construction_procedure_{}.csv'.format(iteration_name))
if len(df_recalled_towers) > 0:
    df_recalled_towers.to_csv(experiment_results_dir + '/cogsci24/df_recalled_towers_{}.csv'.format(iteration_name))

In [1147]:
! open ~/zipping/results/build_components/cogsci24/

### Exclusion criteria (implement in analysis scripts)

In [1148]:
# df_all_trial = pd.concat([df_learn, df_recalled_towers], ignore_index=True)

In [1149]:
# # remove experimenter data
# remove_tests = False

# if remove_tests:
#     df_build = df_build[~df_build.workerID.isna()]
#     df_survey = df_survey[~df_survey.workerID.isna()]
#     df_learn = df_learn[~df_learn.workerID.isna()]
#     df_recall = df_recall[~df_recall.workerID.isna()]

In [1150]:
# df_learn.groupby(['workerID','gameID']).apply(len)

In [1151]:
# # remove incomplete datasets (build recall)
# remove_incomplete_datasets = True
# n_expected_learn_trials = 18

# if remove_incomplete_datasets:
#     a = df_learn.groupby('gameID').apply(len) == n_expected_learn_trials
#     complete_zipping_set_gameIDs = list(a[a].index)
#     df_trials = df_all_trial[df_all_trial.gameID.isin(complete_zipping_set_gameIDs)]
#     df_learn = df_learn[df_learn.gameID.isin(complete_zipping_set_gameIDs)]
#     df_recalled_towers = df_recalled_towers[df_recalled_towers.gameID.isin(complete_zipping_set_gameIDs)]
    
#     incomplete_zipping_set_gameIDs = list(a[~a].index)
#     print(str(len(incomplete_zipping_set_gameIDs)) + ' ppts removed for incomplete data')
#     print(str(len(complete_zipping_set_gameIDs)) + ' ppts left')
# else: 
#     print('No ppts removed')