In [1]:
import os
import sys
import urllib, io
import pickle

import numpy as np
import scipy.stats as stats
import pandas as pd
from sklearn.metrics import euclidean_distances, jaccard_score, pairwise_distances

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 
from IPython.core.display import HTML 

from io import BytesIO
import base64
import requests

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

sys.path.append("../../stimuli/block_utils/")
import blockworld_utils as utils

In [2]:
experiment_name = 'build_components'

## directory & file hierarchy
proj_dir = os.path.abspath('../..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(analysis_dir,'results')

# paths specific to this experiment
experiment_results_dir = os.path.join(results_dir, experiment_name)
plot_dir = os.path.join(experiment_results_dir,'plots')
csv_dir = os.path.join(experiment_results_dir,'csv')
json_dir = os.path.join(experiment_results_dir,'json')

png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

In [3]:
# set vars 
auth = pd.read_csv(os.path.join(proj_dir, 'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['block_construction']
coll = db['build_components']

# which iteration name should we use?
iteration_name = 'build_components_build_recall_prolific_pilot_12_towers'
iteration_names = [iteration_name]

In [4]:
# all data
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_all = pd.DataFrame(query)
print(len(df_all))

1811


In [5]:
# load metadata
query = coll.find({"$and":[
                        {'datatype':'metadata'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_meta = pd.DataFrame(query)
print(len(df_meta))

0


In [6]:
# learning/ exposure trials
learn_conditions = ['block-tower-viewing','block-tower-building-undo']

query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type':{ '$in': learn_conditions }},
                        ]})
df_learn = pd.DataFrame(query)
print(len(df_learn))

120


In [7]:
df_all.datatype.unique()

array(['trial_end', 'reset', 'block_placement', 'block_undo_placement',
       'recalled_tower', 'block_redo_placement'], dtype=object)

In [8]:
# block placements
query = coll.find({"$and":[
                        {'datatype':'block_placement'},
                        {'iterationName': { '$in': iteration_names }},
                        ]})
df_blocks = pd.DataFrame(query)
print(len(df_blocks))

1343


In [9]:
# old-new judgements
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'trial_type':'block-tower-build-recall'},
                        ]})
df_recall_trial = pd.DataFrame(query)
print(len(df_recall_trial))

10


In [10]:
# old-new judgements
query = coll.find({"$and":[
                        {'iterationName': { '$in': iteration_names }},
                        {'datatype':'recalled_tower'},
                        ]})
df_recalled_towers = pd.DataFrame(query)
print(len(df_recalled_towers))

65


In [11]:
# look at survey responses
list(df_all[df_all.trial_type == 'survey-text']['response'])

[{'technical': '', 'confused': '', 'comments': ''},
 {'technical': 'No technical difficulties',
  'confused': 'The instructions were quite clear ',
  'comments': 'My short term memory seem to have become a slight blocker for the final part of the experiment'},
 {'technical': 'no',
  'confused': 'no',
  'comments': 'this was excellent thank you '},
 {'technical': 'no', 'confused': 'no', 'comments': 'It was difficult!'},
 {'technical': 'No', 'confused': 'No', 'comments': 'NA'},
 {'technical': '',
  'confused': '',
  'comments': 'Difficult to recall initial shapes are just a few others'},
 {'technical': '', 'confused': '', 'comments': ''},
 {'technical': 'No technical difficulties',
  'confused': "Having to look at various  towers and then build different ones was confusing. I also didn't figure out how to use the undo ctrl+z until the very end",
  'comments': ''},
 {'technical': 'sometimes the click didnt work at first',
  'confused': 'No. ',
  'comments': 'Thanks for the opportunity, I 

### Exclusion criteria

In [None]:
df_all_trial = df_learn.append(df_recall, ignore_index=True)

In [None]:
# remove experimenter data
remove_tests = False

if remove_tests:
    df_build = df_build[~df_build.workerID.isna()]
    df_survey = df_survey[~df_survey.workerID.isna()]
    df_learn = df_learn[~df_learn.workerID.isna()]
    df_recall = df_recall[~df_recall.workerID.isna()]

In [None]:
# remove incomplete datasets
remove_incomplete_datasets = True
n_expected_trials = 36

if remove_incomplete_datasets:
    a = df_all_trial.groupby('gameID').apply(len) == n_expected_trials
    complete_zipping_set_gameIDs = list(a[a].index)
    df_trials = df_all_trial[df_all_trial.gameID.isin(complete_zipping_set_gameIDs)]
    df_learn = df_learn[df_learn.gameID.isin(complete_zipping_set_gameIDs)]
    df_recall = df_recall[df_recall.gameID.isin(complete_zipping_set_gameIDs)]
    
    incomplete_zipping_set_gameIDs = list(a[~a].index)
    print(str(len(incomplete_zipping_set_gameIDs)) + ' ppts removed for incomplete data')
    print(str(len(complete_zipping_set_gameIDs)) + ' ppts left')
else: 
    print('No ppts removed')

#### Flag potentially weird behaviors

Flag:
- < 50% accuracy?
- button bashing on recall trials?
- fast rts on recall trials?

In [21]:
df_trial = df_learn.append(df_recall_trial, ignore_index=True)

  df_trial = df_learn.append(df_recall_trial, ignore_index=True)


### Save data to csv

In [22]:
df_learn.to_csv(experiment_results_dir + '/df_learn_{}.csv'.format(iteration_name))
df_trial.to_csv(experiment_results_dir + '/df_trial_{}.csv'.format(iteration_name))
df_recall_trial.to_csv(experiment_results_dir + '/df_recall_trial_{}.csv'.format(iteration_name))
df_recalled_towers.to_csv(experiment_results_dir + '/df_recalled_towers_{}.csv'.format(iteration_name))
df_blocks.to_csv(experiment_results_dir + '/df_blocks_{}.csv'.format(iteration_name))

In [None]:
df_trial = pd.read_csv(experiment_results_dir + '/df_trial_{}.csv'.format(iteration_name))
df_trial