In [112]:
import os
import sys
import urllib, io

import numpy as np
import scipy.stats as stats
import pandas as pd

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

In [142]:
## directory & file hierarchy
proj_dir = os.path.abspath('..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

In [153]:
## set vars 
auth = pd.read_csv(os.path.join(analysis_dir,'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

## have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['block_construction']
coll = db['silhouette']

## which iteration name should we use?
iterationName = 'pilot4'

## Sanity Checks

In [154]:
# Ensure one to one gameID and workerId 
# Should only happen if a repeat worker gets through

query = coll.find({"$and":[
                        {'workerId':{'$exists':True}},
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'trial_end'},
                        {"$or":[{'iterationName':'pilot2'},
                                {'iterationName':'pilot3'},
                                {'iterationName':'pilot4'}]},
                        {'trialNum':0}]
                     })

df_trial_end_full = pd.DataFrame(list(query.sort('timeAbsolute')))
#df_trial_end_full[['workerId','gameID']]


assert (np.mean(df_trial_end_full['workerId'].value_counts()) == np.mean(df_trial_end_full['gameID'].value_counts()))

In [155]:
# get ids of people with trial 15 data
query = coll.find({"$and":[
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'trial_end'},
                        {'iterationName': iterationName},
                        {'trialNum':15}]
                     })
complete_data_df = pd.DataFrame(query)
complete_data_ids = list(complete_data_df['workerId'])

In [156]:
complete_data_df['workerId'].nunique()

35

## Collect data from db and filter with sanity checks

In [157]:
query = coll.find({"$and":[
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'trial_end'},
                        {'iterationName':iterationName}]
                     })

df_trial_end_full = pd.DataFrame(list(query.sort('timeAbsolute')))



# filter dataframe for complete datasets
df_trial_end_full_filtered = df_trial_end_full[df_trial_end_full.workerId.isin(complete_data_ids)]


# reduce to crucial information
df_trial_end_reduced_filtered = df_trial_end_full_filtered[['gameID','trialNum','phase','condition',
                                                            'eventType','score','normedScore','numBlocks',
                                                            'timeAbsolute','timeRelative','buildTime',
                                                            'currBonus','exploreResets','buildResets',
                                                            'allVertices','nPracticeAttempts','exploreStartTime',
                                                            'buildStartTime','buildFinishTime','targetName','numBlocksExplore']]

df_for_analysis = df_trial_end_reduced_filtered.sort_values(by=['gameID', 'timeAbsolute'])
df_for_analysis.head()


Unnamed: 0,gameID,trialNum,phase,condition,eventType,score,normedScore,numBlocks,timeAbsolute,timeRelative,...,currBonus,exploreResets,buildResets,allVertices,nPracticeAttempts,exploreStartTime,buildStartTime,buildFinishTime,targetName,numBlocksExplore
498,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,0,build,mental,trial_end,0.03,0.882519,5,1575564000000.0,320950.72,...,0.03,0,2,"[[{'x': 496.5844199549109, 'y': 592.4785727811...",,1575563000000.0,1575563000000.0,1575564000000.0,hand_selected_015,0
506,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,1,build,physical,trial_end,0.03,0.401916,2,1575564000000.0,416116.955,...,0.0,0,1,"[[{'x': 501.0962572421163, 'y': 592.4968160271...",,1575564000000.0,1575564000000.0,1575564000000.0,hand_selected_006,0
512,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,2,build,physical,trial_end,0.06,0.879605,7,1575564000000.0,511102.92,...,0.03,0,0,"[[{'x': 273.84901599820495, 'y': 592.692045004...",,1575564000000.0,1575564000000.0,1575564000000.0,hand_selected_004,2
518,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,3,build,mental,trial_end,0.06,0.557653,10,1575564000000.0,606017.875,...,0.0,0,0,"[[{'x': 414.4062149949713, 'y': 482.4785754458...",,1575564000000.0,1575564000000.0,1575564000000.0,hand_selected_001,0
523,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,4,build,mental,trial_end,0.11,0.976224,7,1575564000000.0,701017.77,...,0.05,0,0,"[[{'x': 444.9290823724809, 'y': 592.8611021306...",,1575564000000.0,1575564000000.0,1575564000000.0,hand_selected_005,0


In [158]:
## save out to csv dir, where all the csv's go to live
out_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
df_for_analysis.to_csv(out_path)

## Settled Block Data

In [159]:
print('Loading iteration: ' + iterationName)
query = coll.find({"$and":[
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'settled'},
                        {'iterationName':iterationName}]
                     })

df_settled_full = pd.DataFrame(list(query))


# filter dataframe for complete datasets
df_settled_full_filtered = df_settled_full[df_settled_full.workerId.isin(complete_data_ids)]

print('Loaded ' + str(df_settled_full_filtered.shape[0]) + ' complete sets of settled blocks')
# reduce to crucial information

Loading iteration: pilot4


Unnamed: 0,F1Score,_id,aID,allBlockBodyProperties,allBlockDims,allVertices,blockBodyProperties,blockCenterX,blockCenterY,blockDimUnits,...,timeAbsolute,timeBlockPlaced,timeRelative,trialNum,version,vertices,workerId,worldHeightUnits,worldScale,worldWidthUnits
0,0.000000,5de923421c99bf1b2bc6f3cd,3STRJBFXOX59WHD2CT0BRRA3KA8KT7,"[{'id': 39, 'angle': 0, 'position': {'x': 552....","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 497.3750000000001, 'y': 482.4785714285...","{'id': 39, 'type': 'body', 'label': 'Rectangle...",552.375000,592.478571,"[2, 4]",...,1.575560e+12,1.575560e+12,129846.430,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A1ZB2NY0F9QNP0,8,2.2,8
1,0.000000,5de923461c99bf1b2bc6f3d9,3STRJBFXOX59WHD2CT0BRRA3KA8KT7,"[{'id': 39, 'angle': 0, 'position': {'x': 552....","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 497.3750000000001, 'y': 482.4785714285...","{'id': 42, 'type': 'body', 'label': 'Rectangle...",331.875000,592.478571,"[2, 4]",...,1.575560e+12,1.575560e+12,133007.680,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A1ZB2NY0F9QNP0,8,2.2,8
2,0.000000,5de923461c99bf1b2bc6f3de,3I33IC7ZWGG905A3HN3QWQYHFL1A2Q,"[{'id': 31, 'angle': 0, 'position': {'x': 668....","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 613.25, 'y': 482.4785714285739}, {'x':...","{'id': 31, 'type': 'body', 'label': 'Rectangle...",668.250000,592.478571,"[2, 4]",...,1.575560e+12,1.575560e+12,136073.630,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A27ZE20JZ3VDUP,8,2.2,8
3,0.000000,5de923491c99bf1b2bc6f3e3,3STRJBFXOX59WHD2CT0BRRA3KA8KT7,"[{'id': 39, 'angle': 5.900663664268233e-06, 'p...","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 497.37639313496567, 'y': 482.496220347...","{'id': 44, 'type': 'body', 'label': 'Rectangle...",446.626476,427.546761,"[4, 2]",...,1.575560e+12,1.575560e+12,136257.510,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A1ZB2NY0F9QNP0,8,2.2,8
4,0.000000,5de9234b1c99bf1b2bc6f3e7,3I33IC7ZWGG905A3HN3QWQYHFL1A2Q,"[{'id': 31, 'angle': 0, 'position': {'x': 668....","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 613.25, 'y': 482.47857142857515}, {'x'...","{'id': 33, 'type': 'body', 'label': 'Rectangle...",425.250000,647.478571,"[2, 2]",...,1.575560e+12,1.575560e+12,141691.875,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A27ZE20JZ3VDUP,8,2.2,8
5,0.000000,5de9234c1c99bf1b2bc6f3ed,3STRJBFXOX59WHD2CT0BRRA3KA8KT7,"[{'id': 39, 'angle': 1.4427832123646153e-05, '...","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 497.3777917881998, 'y': 482.5350679289...","{'id': 46, 'type': 'body', 'label': 'Rectangle...",586.135504,427.584865,"[1, 2]",...,1.575560e+12,1.575560e+12,139420.665,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A1ZB2NY0F9QNP0,8,2.2,8
6,0.000000,5de923501c99bf1b2bc6f3f6,3I33IC7ZWGG905A3HN3QWQYHFL1A2Q,"[{'id': 31, 'angle': 0, 'position': {'x': 668....","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 613.25, 'y': 482.4785714285764}, {'x':...","{'id': 35, 'type': 'body', 'label': 'Rectangle...",308.250000,647.478571,"[2, 2]",...,1.575560e+12,1.575560e+12,145946.340,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A27ZE20JZ3VDUP,8,2.2,8
7,0.000000,5de923511c99bf1b2bc6f3f8,3STRJBFXOX59WHD2CT0BRRA3KA8KT7,"[{'id': 39, 'angle': 2.4781415772939872e-05, '...","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 497.376769552052, 'y': 482.61805967870...","{'id': 48, 'type': 'body', 'label': 'Rectangle...",586.147514,317.926857,"[1, 2]",...,1.575560e+12,1.575560e+12,144274.145,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A1ZB2NY0F9QNP0,8,2.2,8
8,0.000000,5de923521c99bf1b2bc6f3ff,3I33IC7ZWGG905A3HN3QWQYHFL1A2Q,"[{'id': 31, 'angle': 0, 'position': {'x': 668....","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 613.25, 'y': 482.47857142857765}, {'x'...","{'id': 37, 'type': 'body', 'label': 'Rectangle...",306.004516,537.545656,"[2, 2]",...,1.575560e+12,1.575560e+12,148464.805,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A27ZE20JZ3VDUP,8,2.2,8
9,0.000000,5de923581c99bf1b2bc6f40a,3I33IC7ZWGG905A3HN3QWQYHFL1A2Q,"[{'id': 31, 'angle': 0, 'position': {'x': 668....","[[1, 2], [2, 1], [2, 2], [2, 4], [4, 2]]","[[{'x': 613.25, 'y': 482.4785714285789}, {'x':...","{'id': 39, 'type': 'body', 'label': 'Rectangle...",425.250000,537.547242,"[2, 2]",...,1.575560e+12,1.575560e+12,154523.690,0,81,"[{'x': -247.50000000000017, 'y': 702.428571428...",A27ZE20JZ3VDUP,8,2.2,8


In [163]:

df_settled_reduced_filtered = df_settled_full_filtered[['gameID','trialNum','phase','condition',
                                                            'eventType','numBlocks', 'timeAbsolute','timeRelative',
                                                            'normedScore','currBonus','score','incrementalScore','normedIncrementalScore',
                                                            'currBonus','allVertices','targetName','relativePlacementTime','iterationName',
                                                            'blockKind'
                                                           ]]

df_settled_reduced_filtered = df_settled_reduced_filtered.sort_values(by=['gameID', 'timeAbsolute'])

buildstart = df_for_analysis[['gameID','trialNum','buildStartTime','exploreStartTime']]

# copy across time variables that were not saved in the correct place in pilot 3
df_settled_reduced_filtered = df_settled_reduced_filtered.merge(buildstart, on=['gameID', 'trialNum'], how='left')
df_settled_reduced_filtered['timePlaced'] = df_settled_reduced_filtered['timeAbsolute'] - df_settled_reduced_filtered['buildStartTime']
df_settled_reduced_filtered.loc[(df_settled_reduced_filtered.timePlaced < 0),'timePlaced'] = df_settled_reduced_filtered[df_settled_reduced_filtered.timePlaced < 0]['timeAbsolute'] - df_settled_reduced_filtered[df_settled_reduced_filtered.timePlaced < 0]['exploreStartTime']

# 
df_settled_reduced_filtered.loc[(df_settled_reduced_filtered.iterationName == 'pilot4'),'timePlaced'] = df_settled_reduced_filtered[(df_settled_reduced_filtered.iterationName == 'pilot4')]['relativePlacementTime'] 
df_settled_reduced_filtered.loc[(df_settled_reduced_filtered.timePlaced <= 0),'timePlaced'] = 0



In [164]:
## save out to csv dir, where all the csv's go to live
out_path = os.path.join(csv_dir,'block_silhouette_settled_{}.csv'.format(iterationName))
df_settled_reduced_filtered.to_csv(out_path)

In [132]:
## which iteration name should we use?
iterationName = 'pilot4'

## load in dataframe
data_path = os.path.join(csv_dir,'block_silhouette_settled_{}.csv'.format(iterationName))
df = pd.read_csv(data_path)
df

Unnamed: 0.1,Unnamed: 0,gameID,trialNum,phase,condition,eventType,numBlocks,timeAbsolute,timeRelative,normedScore,...,incrementalScore,normedIncrementalScore,currBonus.1,allVertices,targetName,relativePlacementTime,iterationName,buildStartTime,exploreStartTime,timePlaced
0,0,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,0,build,mental,settled,1,1.575564e+12,304021.925,0.000000,...,0.518171,0.406914,0.00,"[[{'x': 496.5999450683595, 'y': 592.4785714285...",hand_selected_015,38477,pilot4,1.575563e+12,1.575563e+12,38477
1,1,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,0,build,mental,settled,2,1.575564e+12,309074.700,0.000000,...,0.666667,0.589698,0.00,"[[{'x': 496.5999450683595, 'y': 592.4785714285...",hand_selected_015,43294,pilot4,1.575563e+12,1.575563e+12,43294
2,2,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,0,build,mental,settled,3,1.575564e+12,314450.495,0.000000,...,0.789323,0.740677,0.00,"[[{'x': 496.60188322094706, 'y': 592.494480207...",hand_selected_015,48380,pilot4,1.575563e+12,1.575563e+12,48380
3,3,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,0,build,mental,settled,4,1.575564e+12,317490.745,0.000000,...,0.906115,0.884437,0.00,"[[{'x': 496.5941205868618, 'y': 592.5468387980...",hand_selected_015,53738,pilot4,1.575563e+12,1.575563e+12,53738
4,4,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,0,build,mental,settled,5,1.575564e+12,320888.670,0.882519,...,0.904557,0.882519,0.03,"[[{'x': 496.5844199549109, 'y': 592.4785727811...",hand_selected_015,56779,pilot4,1.575563e+12,1.575563e+12,56779
5,5,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,1,build,physical,settled,1,1.575564e+12,372582.030,0.000000,...,0.400000,0.285125,0.00,"[[{'x': 498.8499450683594, 'y': 592.4785714285...",hand_selected_006,10837,pilot4,1.575564e+12,1.575564e+12,10837
6,6,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,1,build,physical,settled,2,1.575564e+12,381065.565,0.000000,...,0.561538,0.477591,0.00,"[[{'x': 498.8499450683594, 'y': 592.4785714285...",hand_selected_006,16695,pilot4,1.575564e+12,1.575564e+12,16695
7,7,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,1,build,physical,settled,3,1.575564e+12,386365.910,0.000000,...,0.692108,0.633159,0.00,"[[{'x': 498.8499450683594, 'y': 592.4785714285...",hand_selected_006,25184,pilot4,1.575564e+12,1.575564e+12,25184
8,8,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,1,build,physical,settled,4,1.575564e+12,392854.330,0.000000,...,0.686757,0.626784,0.00,"[[{'x': 498.8499450683594, 'y': 592.4785714285...",hand_selected_006,30484,pilot4,1.575564e+12,1.575564e+12,30484
9,9,0468-bb3f0e39-c829-430d-9c75-c538d48abe22,1,build,physical,settled,1,1.575564e+12,410077.425,0.000000,...,0.487562,0.389452,0.00,"[[{'x': 501.0999450683595, 'y': 592.4785714285...",hand_selected_006,50687,pilot4,1.575564e+12,1.575564e+12,50687


In [133]:
df[df.targetName=='0']

Unnamed: 0.1,Unnamed: 0,gameID,trialNum,phase,condition,eventType,numBlocks,timeAbsolute,timeRelative,normedScore,...,incrementalScore,normedIncrementalScore,currBonus.1,allVertices,targetName,relativePlacementTime,iterationName,buildStartTime,exploreStartTime,timePlaced
