In [1]:
import os
import sys
import urllib, io
import pickle

import numpy as np
import scipy.stats as stats
import pandas as pd
from sklearn.metrics import euclidean_distances, jaccard_score, pairwise_distances

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 
from IPython.core.display import HTML 

from io import BytesIO
import base64
import requests

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

sys.path.append("../../stimuli/block_utils/")
import blockworld_utils as utils

In [24]:
experiment_name = 'build_components'

## directory & file hierarchy
proj_dir = os.path.abspath('../..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(analysis_dir,'results')

# paths specific to this experiment
experiment_results_dir = os.path.join(results_dir, experiment_name)
plot_dir = os.path.join(experiment_results_dir,'plots')
csv_dir = os.path.join(experiment_results_dir,'csv')
json_dir = os.path.join(experiment_results_dir,'json')

png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

In [3]:
# set vars 
auth = pd.read_csv(os.path.join(proj_dir, 'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['block_construction']
coll = db['build_components']

# which iteration name should we use?
iteration_name = 'build_components_pilot_2'
iteration_names = [iteration_name]

In [5]:
# all data
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_all = pd.DataFrame(query)
print(len(df_all))

6376


In [4]:
# load metadata
query = coll.find({"$and":[
                        {'datatype':'metadata'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_meta = pd.DataFrame(query)
print(len(df_meta))

64


In [9]:
# learning/ exposure trials
learn_conditions = ['block-tower-viewing','block-tower-building']

query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type':{ '$in': learn_conditions }},
                        ]})
df_learn = pd.DataFrame(query)
print(len(df_learn))

631


In [51]:
# block placements
query = coll.find({"$and":[
                        {'datatype':'block_placement'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_blocks = pd.DataFrame(query)
print(len(df_blocks))

3626


In [11]:
# old-new judgements
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type':'block-tower-old-new'},
                        ]})
df_recall = pd.DataFrame(query)
print(len(df_recall))

1200


In [13]:
# look at survey responses
# list(df_all[df_all.trial_type == 'survey-text']['response'])

In [18]:
# look at columns
# list(np.sort(df_all.columns))

### Exclusion criteria

In [46]:
df_all_trial = df_learn.append(df_recall, ignore_index=True)

In [47]:
# remove experimenter data
remove_tests = False

if remove_tests:
    df_build = df_build[~df_build.workerID.isna()]
    df_survey = df_survey[~df_survey.workerID.isna()]
    df_learn = df_learn[~df_learn.workerID.isna()]
    df_recall = df_recall[~df_recall.workerID.isna()]

In [48]:
# remove incomplete datasets
remove_incomplete_datasets = True
n_expected_trials = 36

if remove_incomplete_datasets:
    a = df_all_trial.groupby('gameID').apply(len) == n_expected_trials
    complete_zipping_set_gameIDs = list(a[a].index)
    df_trials = df_all_trial[df_all_trial.gameID.isin(complete_zipping_set_gameIDs)]
    df_learn = df_learn[df_learn.gameID.isin(complete_zipping_set_gameIDs)]
    df_recall = df_recall[df_recall.gameID.isin(complete_zipping_set_gameIDs)]
    
    incomplete_zipping_set_gameIDs = list(a[~a].index)
    print(str(len(incomplete_zipping_set_gameIDs)) + ' ppts removed for incomplete data')
    print(str(len(complete_zipping_set_gameIDs)) + ' ppts left')
else: 
    print('No ppts removed')

0 ppts removed for incomplete data
50 ppts left


#### Flag potentially weird behaviors

Flag:
- < 50% accuracy?
- button bashing on recall trials?
- fast rts on recall trials?

In [49]:
df_trial = df_learn.append(df_recall, ignore_index=True)

In [50]:
df_trial

Unnamed: 0,_id,trial_start_time,trial_finish_time,condition,stimulus,response,trial_num,block_str,tower_id,tower_A_tall_id,...,eventType,endReason,relative_time,rep,n_resets,rt,novelty,response_meaning,response_correct,key_presses
0,637e7b77c178f27685c4f608,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",,1,0000000000000000010100000101000011110000111000...,talls_101_100,tall_101,...,,,,,,,,,,
1,637e7b88c178f27685c4f60a,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,2,0000000000000000010100000101000011110000100100...,talls_100_097,tall_100,...,,,,,,,,,,
2,637e7b98c178f27685c4f60c,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,3,0000000000000000011100000110000011100000101000...,talls_100_125,tall_100,...,,,,,,,,,,
3,637e7bb3c178f27685c4f61c,1.669234e+12,,build,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",,4,0000000000000000110100000101000001110000111000...,talls_121_100,tall_121,...,trial_end,perfect-reconstruction-translation,21617.0,0.0,0.0,,,,,
4,637e7bc3c178f27685c4f61e,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 2, 'wid...",,5,0000000000000000101000001010000011110000111000...,talls_116_114,tall_116,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,637ea9a9c178f27685c50ea5,1.669245e+12,1.669245e+12,build,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",z,32,0000000000000000010100000101000011110000111000...,talls_101_100,tall_101,...,,,,,,2649.0,old,new,0.0,1.0
1796,637ea9afc178f27685c50ea6,1.669245e+12,1.669245e+12,foil,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",z,33,0000000000000000011000000110000011110000110100...,talls_101_111,tall_101,...,,,,,,2220.6,new,new,1.0,1.0
1797,637ea9b6c178f27685c50ea7,1.669245e+12,1.669245e+12,foil,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",z,34,0000000000000000110100000101000001110000111000...,talls_121_100,tall_121,...,,,,,,2261.4,new,new,1.0,1.0
1798,637ea9bbc178f27685c50ea8,1.669245e+12,1.669245e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 2, 'wid...",z,35,0000000000000000011100000110000011100000111100...,talls_102_127,tall_102,...,,,,,,1791.2,old,new,0.0,1.0


### Save data to csv

In [52]:
df_trial.to_csv(analysis_dir + '/df_trial_{}.csv'.format(iteration_name))
df_blocks.to_csv(analysis_dir + '/df_blocks_{}.csv'.format(iteration_name))

In [32]:
df_trial = pd.read_csv(analysis_dir + '/df_trial_{}.csv'.format(iteration_name))
df_trial

Unnamed: 0.1,Unnamed: 0,_id,trial_start_time,trial_finish_time,condition,stimulus,response,trial_num,block_str,tower_id,...,eventType,endReason,relative_time,rep,n_resets,rt,novelty,response_meaning,response_correct,key_presses
0,0,637e7b77c178f27685c4f608,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",,1,0000000000000000010100000101000011110000111000...,talls_101_100,...,,,,,,,,,,
1,1,637e7b88c178f27685c4f60a,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,2,0000000000000000010100000101000011110000100100...,talls_100_097,...,,,,,,,,,,
2,2,637e7b98c178f27685c4f60c,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 1, 'wid...",,3,0000000000000000011100000110000011100000101000...,talls_100_125,...,,,,,,,,,,
3,3,637e7bb3c178f27685c4f61c,1.669234e+12,,build,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",,4,0000000000000000110100000101000001110000111000...,talls_121_100,...,trial_end,perfect-reconstruction-translation,21617.0,0.0,0.0,,,,,
4,4,637e7bc3c178f27685c4f61e,1.669234e+12,1.669234e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 2, 'wid...",,5,0000000000000000101000001010000011110000111000...,talls_116_114,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,1795,637ea9a9c178f27685c50ea5,1.669245e+12,1.669245e+12,build,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",z,32,0000000000000000010100000101000011110000111000...,talls_101_100,...,,,,,,2649.0,old,new,0.0,1.0
1796,1796,637ea9afc178f27685c50ea6,1.669245e+12,1.669245e+12,foil,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",z,33,0000000000000000011000000110000011110000110100...,talls_101_111,...,,,,,,2220.6,new,new,1.0,1.0
1797,1797,637ea9b6c178f27685c50ea7,1.669245e+12,1.669245e+12,foil,"{'blocks': [{'x': 1, 'y': 0, 'height': 2, 'wid...",z,34,0000000000000000110100000101000001110000111000...,talls_121_100,...,,,,,,2261.4,new,new,1.0,1.0
1798,1798,637ea9bbc178f27685c50ea8,1.669245e+12,1.669245e+12,view,"{'blocks': [{'x': 0, 'y': 0, 'height': 2, 'wid...",z,35,0000000000000000011100000110000011100000111100...,talls_102_127,...,,,,,,1791.2,old,new,0.0,1.0
