In [1]:
import os
import sys
import urllib, io

import numpy as np
import scipy.stats as stats
import pandas as pd

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

In [133]:
## directory & file hierarchy
proj_dir = os.path.abspath('..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

In [134]:
# set vars 
auth = pd.read_csv(os.path.join(analysis_dir,'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['block_construction']
coll = db['silhouette']

# which iteration name should we use?
iterationName = 'Exp2Pilot3'

# variables to check integrity of data
numTrials = 24

## Sanity Checks

In [160]:
# Ensure one to one gameID and workerId 
# Should only happen if a repeat worker gets through

query = coll.find({"$and":[
                        {'workerId':{'$exists':True}},
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'trial_end'},
                        {"$or":[{'iterationName':'pilot2'},
                                {'iterationName':'pilot3'},
                                {'iterationName':'pilot4'},
                                {'iterationName':'Exp2Pilot1'},
                                {'iterationName':'Exp2Pilot1_turk'},
                                {'iterationName':'Exp2Pilot1_turk'}]},
                        {'trialNum':0}]
                     })

df_trial_end_full = pd.DataFrame(list(query.sort('timeAbsolute')))
#df_trial_end_full[['workerId','gameID']]


assert (np.mean(df_trial_end_full['workerId'].value_counts()) == np.mean(df_trial_end_full['gameID'].value_counts()))

## Trial Level Data

In [161]:
# Assuming that if trial 23 saves, then 0-22 have also saved 
# get ids of people with trial 23 data
query = coll.find({"$and":[
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'trial_end'},
                        {"$or":[{'iterationName':'Exp2Pilot3'},
                                {'iterationName':'Exp2Pilot3_batch2'}]},
                        #{'iterationName': iterationName}, #use this if one iteration name
                        {'trialNum': numTrials-1}]
                     })
complete_data_df = pd.DataFrame(query)
complete_data_ids = list(complete_data_df['workerId'])

In [162]:
# Filter for full datasets
query = coll.find({"$and":[
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'trial_end'},
                        #{'iterationName': iterationName}, #use this if one iteration name
                        {"$or":[{'iterationName':'Exp2Pilot3'},
                                {'iterationName':'Exp2Pilot3_batch2'}]}]
                     })

df_trial_end_full = pd.DataFrame(list(query.sort('timeAbsolute')))


# filter dataframe for complete datasets
df_trial_end_full_filtered = df_trial_end_full[df_trial_end_full.workerId.isin(complete_data_ids)]

# reduce to crucial information
df_trial_end_reduced_filtered = df_trial_end_full_filtered[[
    'gameID','trialNum','phase','condition','eventType','targetName','repetition','targetID', #trial identifiers
    'nullScore','F1Score','normedScore','rawScoreDiscrete','nullScoreDiscrete','normedScoreDiscrete','scoreGapDiscrete', #scoring
    'numBlocks','nPracticeAttempts','blockColor','blockColorID','blockFell','doNothingRepeats',#misc. trial info
    'score','currBonus','timeBonus', #bonusing
    'timeAbsolute','timeRelative','buildTime','buildStartTime','buildFinishTime','timeToBuild', #timing 
    'discreteWorld','allVertices', #world reconstruction
    'browser','browserVersion','os','devMode', #developer info
    #below here should be the same for every trial in a dataset
    'iterationName',
    'numTargets', 'prePostSetSize','numRepetitions', #pre-post info
    'bonusThresholdLow','bonusThresholdMid','bonusThresholdHigh','timeThresholdYellow','timeThresholdRed', #bonus info
    ]]

#Fix error in data-saving- normedScoreDiscrete saved as rawScoreDiscrete
df_trial_end_reduced_filtered['normedScoreDiscrete'] = df_trial_end_reduced_filtered['rawScoreDiscrete']
df_trial_end_reduced_filtered.drop(['rawScoreDiscrete'], axis=1)


df = df_trial_end_reduced_filtered.sort_values(by=['gameID', 'timeAbsolute'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### Compute Spatial Reconstruction Accuracy

In [163]:
targetMaps = {}

with open(os.path.join(csv_dir,'targetMaps.txt')) as json_file:
    targetMaps = json.load(json_file)

In [164]:
def getPrecision(arr1,arr2):
    prod = np.multiply(arr1,arr2)
    false_pos = np.subtract(arr2,prod)
    numerator = np.sum(prod)
    denominator = np.add(numerator,np.sum(false_pos))
    recall = numerator/denominator
    return recall

def getRecall(arr1,arr2):
    prod = np.multiply(arr1,arr2)
    false_neg = np.subtract(arr1,prod)
    numerator = np.sum(prod)
    denominator = np.add(np.sum(prod),np.sum(false_neg))
    recall = numerator/denominator
    return recall

def getF1Score(targetName, discreteWorld):
    targetMap = targetMaps[targetName]
    arr1 = 1*np.logical_not(np.array(targetMap))
    arr2 = 1*np.logical_not(np.array(discreteWorld))
    recall = getRecall(arr1, arr2)
    precision = getPrecision(arr1, arr2)
    numerator = np.multiply(precision, recall)
    denominator = np.add(precision, recall)
    if (denominator>0):
        quotient = np.divide(numerator, denominator)
        f1Score = np.multiply(2, quotient)
    else:
        f1Score = 0
#     print('recall ' + str(recall))
#     print('precision ' + str(precision))
    return f1Score

def getF1ScoreLambda(row):
    return(getF1Score(row['targetName'], row['discreteWorld']))


def getJaccard(targetName, discreteWorld):
    targetMap = targetMaps[targetName]
    arr1 = 1*np.logical_not(np.array(targetMap))
    arr2 = 1*np.logical_not(np.array(discreteWorld))
    
    prod = np.multiply(arr1,arr2)
    true_pos = np.sum(prod)
    false_pos = np.sum(np.subtract(arr2,prod))
    false_neg = np.sum(np.subtract(arr1,prod))
#     print(true_pos)
#     print(false_pos)
#     print(false_neg)

    denomenator = np.add(false_neg,np.add(false_pos,true_pos))
    jaccard = np.divide(true_pos,denomenator)
    #print('recall ' + recall);
    return jaccard

def getJaccardLambda(row):
    return(getJaccard(row['targetName'], row['discreteWorld']))

# def getNullScore(targetName):
#     targetMap = targetMaps[targetName]
#     arr1 = 1*np.logical_not(np.array(targetMap))
#     arr2 = 1*np.zeros(arr1.shape)
#     recall = getRecall(arr1, arr2)
#     precision = getPrecision(arr1, arr2)
#     numerator = np.multiply(precision, recall)
#     denominator = np.add(precision, recall)
#     quotient = np.divide(numerator, denominator)
#     f1Score = np.multiply(2, quotient)
#     print('recall ', str(recall));
#     print('precision ', str(precision));
#     print('quotient ', str(quotient));
#     return f1Score

In [165]:
df['rawF1DiscreteScore'] =  df.apply(getF1ScoreLambda, axis=1)

In [166]:
df['jaccardDiscrete'] = df.apply(getJaccardLambda, axis=1)

In [167]:
# Make new column: phase_extended
# Same as phase but with 'repeated' split into 'repetition 1' and 'repetition 2'

phase_dict = {
    'pre':0,
    'repetition 1':1,
    'repetition 2':2,
    'post':3
}

ordered_phases = ['pre','repetition 1','repetition 2','post']


df['phase_extended'] = df['phase']
df.loc[(df.phase=='repeated') & (df.repetition==1),'phase_extended'] = 'repetition 1'
df.loc[(df.phase=='repeated') & (df.repetition==2),'phase_extended'] = 'repetition 2'


df['phase_number'] = df.phase_extended.astype("category").cat.set_categories(ordered_phases).cat.codes + 1

In [168]:
#Add useful variables for graphing

df['targetNumber'] = df['targetName'].apply(lambda x: x[-2:])
df['perfectScore'] = df.rawF1DiscreteScore == 1

In [169]:
df['gameID'].nunique()

60

## Initial Block Data
Initial block placements (before physics, after snapping, before falling)

In [170]:
query = coll.find({"$and":[
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'initial'},
                        #{'iterationName': iterationName}, #use this if one iteration name
                        {"$or":[{'iterationName':'Exp2Pilot3'},
                                {'iterationName':'Exp2Pilot3_batch2'}]}]
                     })

df_initial_full = pd.DataFrame(list(query))

# filter dataframe for complete datasets
df_initial_full_filtered = df_initial_full[df_initial_full.workerId.isin(complete_data_ids)]

print('Loaded ' + str(df_initial_full_filtered.shape[0]) + ' complete sets of initial blocks')
# reduce to crucial information

Loaded 13029 complete sets of initial blocks


In [171]:
df_initial_full_filtered.columns

Index(['_id', 'iterationName', 'workerId', 'hitID', 'aID', 'gameID', 'version',
       'randID', 'timeRelative', 'timeAbsolute', 'phase', 'condition',
       'trialNum', 'nullScore', 'scoreGap', 'F1Score', 'normedScore',
       'rawScoreDiscrete', 'normedScoreDiscrete', 'nullScoreDiscrete',
       'scoreGapDiscrete', 'currBonus', 'score', 'points', 'numTrials',
       'targetName', 'targetBlocks', 'prompt', 'blockColors', 'blockColor',
       'blockColorID', 'numTargets', 'prePostSetSize', 'numRepetitions',
       'repetition', 'targetID', 'practiceDuration', 'buildDuration',
       'timeThresholdYellow', 'timeThresholdRed', 'devMode',
       'discreteEnvHeight', 'discreteEnvWidth', 'browser', 'browserVersion',
       'os', 'canvasHeight', 'canvasWidth', 'menuHeight', 'menuWidth',
       'floorY', 'stimCanvasWidth', 'stimCanvasHeight', 'stimX', 'stimY',
       'scalingFactor', 'worldScale', 'stim_scale', 'allBlockDims',
       'worldWidthUnits', 'worldHeightUnits', 'blockOptions', 'flo

In [172]:
df_initial_reduced_filtered = df_initial_full_filtered[[
    'gameID','trialNum','phase','condition','eventType','targetName','repetition','targetID','blockNum', #trial identifiers
    'nullScore','incrementalScore','normedIncrementalScore','rawScoreDiscrete','incrementalNormedScoreDiscretePrevious', #scoring
    'score','currBonus', #bonusing
    'timeAbsolute','timeRelative','timeBlockSelected','timeBlockPlaced','relativePlacementTime', #timing 
    'discreteWorld','vertices','blockKind','blockColorID','blockColor','blockCenterX', 'blockCenterY', #world reconstruction
    'x_index','y_index','x_discrete','y_discrete','width_discrete','height_discrete'
    ]]

df_initial_reduced_filtered = df_initial_reduced_filtered.sort_values(by=['gameID', 'timeAbsolute'])

In [173]:
dfi = df_initial_reduced_filtered

In [174]:
dfi['phase_extended'] = dfi['phase']
dfi.loc[(dfi.phase=='repeated') & (dfi.repetition==1),'phase_extended'] = 'repetition 1'
dfi.loc[(dfi.phase=='repeated') & (dfi.repetition==2),'phase_extended'] = 'repetition 2'


# dfi['phase_number'] = dfi.phase_extended.astype("category",
#                                               ordered=True,
#                                               categories=ordered_phases).cat.codes


In [175]:
dfi['rawF1DiscreteScore'] =  dfi.apply(getF1ScoreLambda, axis=1)

## Settled Block Data
Block data after coming to rest (after physics)

In [176]:
query = coll.find({"$and":[
                        {'condition':{'$ne':'practice'}},
                        {'eventType':'settled'},
                        #{'iterationName': iterationName}, #use this if one iteration name
                        {"$or":[{'iterationName':'Exp2Pilot3'},
                                {'iterationName':'Exp2Pilot3_batch2'}]}]
                     })

df_settled_full = pd.DataFrame(list(query))


# filter dataframe for complete datasets
df_settled_full_filtered = df_settled_full[df_settled_full.workerId.isin(complete_data_ids)]

print('Loaded ' + str(df_settled_full_filtered.shape[0]) + ' complete sets of settled blocks')
# reduce to crucial information

Loaded 13059 complete sets of settled blocks


In [177]:
df_settled_full_filtered.columns

Index(['_id', 'iterationName', 'workerId', 'hitID', 'aID', 'gameID', 'version',
       'randID', 'timeRelative', 'timeAbsolute', 'phase', 'condition',
       'trialNum', 'nullScore', 'scoreGap', 'F1Score', 'normedScore',
       'rawScoreDiscrete', 'normedScoreDiscrete', 'nullScoreDiscrete',
       'scoreGapDiscrete', 'currBonus', 'score', 'points', 'numTrials',
       'targetName', 'targetBlocks', 'prompt', 'blockColors', 'blockColor',
       'blockColorID', 'numTargets', 'prePostSetSize', 'numRepetitions',
       'repetition', 'targetID', 'practiceDuration', 'buildDuration',
       'timeThresholdYellow', 'timeThresholdRed', 'devMode',
       'discreteEnvHeight', 'discreteEnvWidth', 'browser', 'browserVersion',
       'os', 'canvasHeight', 'canvasWidth', 'menuHeight', 'menuWidth',
       'floorY', 'stimCanvasWidth', 'stimCanvasHeight', 'stimX', 'stimY',
       'scalingFactor', 'worldScale', 'stim_scale', 'allBlockDims',
       'worldWidthUnits', 'worldHeightUnits', 'blockOptions', 'flo

In [178]:
df_settled_reduced_filtered = df_settled_full_filtered[[
    'gameID','trialNum','phase','condition','eventType','targetName','repetition','targetID', #trial identifiers
    'nullScore','incrementalScore','normedIncrementalScore','rawScoreDiscrete','incrementalNormedScoreDiscrete','numBlocks','blockFell', #scoring
    'score','currBonus', #bonusing
    'timeAbsolute','timeRelative',#timing 
    'discreteWorld','allVertices','blockKind','blockColorID','blockColor','blockCenterX', 'blockCenterY',#world reconstruction
    'x_index','y_index','x_discrete','y_discrete'
    ]]

df_settled_reduced_filtered = df_settled_reduced_filtered.sort_values(by=['gameID', 'timeAbsolute'])

In [179]:
dfs = df_settled_reduced_filtered

In [180]:
dfs['rawF1DiscreteScore'] =  dfs.apply(getF1ScoreLambda, axis=1)

## Survey Data

In [181]:
query = coll.find({"$and":[
                        {'eventType':'survey_data'},
                        #{'iterationName': iterationName}, #use this if one iteration name
                        {"$or":[{'iterationName':'Exp2Pilot3'},
                                {'iterationName':'Exp2Pilot3_batch2'}]}]
                     })
df_survey = pd.DataFrame(list(query.sort('absoluteTime')))
df_survey[['gameID','age','comments','difficulty','fun','strategies','inputDevice','sex','score']]

Unnamed: 0,gameID,age,comments,difficulty,fun,strategies,inputDevice,sex,score
0,1691-3673775e-bf17-46c9-b775-79afc1355b02,28,"Hard! Much harder than I expected! I loved it,...",6,7,Unfortunately I didn't.,Mouse,Male,0.325
1,6834-b18d5535-d036-4aa4-bd46-e31404e1a9b0,50,It was vry diificult nut fun I like to use ny ...,6,7,A good base and trying to pay attention to det...,Trackpad,Female,0.09
2,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,28,I enjoyed the hit.,4,6,I just saw which piece will be suitable for th...,Mouse,Female,0.07
3,6679-27a62967-b9d8-4a25-8b63-74e16e2f7f32,30,"It was fine, no issues",2,5,redrawing the pattern by imagining that I was ...,Mouse,Female,0.635
4,0823-6fe08630-db1c-47e9-b895-7a2bf343bb77,43,"Frustrating at first, but got the hang of it a...",5,6,Not really. Started from the bottom and would...,Mouse,Male,0.805
5,4739-25f27c31-0d4c-46ae-a515-02351c69042d,25,It was horrible. Unless you got it 100% accura...,7,1,I tried my best at first and realized that unl...,Mouse,Male,0.06
6,0622-493f8d2b-4aa3-4b1c-ad92-f6014b9fc709,24,,7,5,,Mouse,Female,0.05
7,1106-c12ab2b8-d38b-4e8f-9244-5adfbadd1093,31,,5,6,just eyeballed it,Mouse,Female,0.405
8,3988-e15c8e2e-0b53-43fd-a2d3-686d3efd6923,50,I thought it was fun no problems with it,6,6,Just tried to follow picture the best I could,Mouse,Female,0.0
9,9995-aa7dd1d6-ff3f-41e8-ade6-da416e7366c3,30,,4,5,no,Mouse,Female,0.17


## Data Cleaning (bugs)

In [182]:
# Remove two block placements (potentially from refreshing?)
# These were recorded but don't seem to be a part of the final structure
# Believe they are from refreshing

dfi = dfi[~(((dfi.gameID == '4611-415301bd-3cd2-4751-9911-e530d1bce758') & 
        (dfi.trialNum==1) & 
        (dfi.blockNum == 1) & 
        (dfi.blockKind=='D')) |
    ((dfi.gameID == '2328-cf96d18d-a95b-4d1b-bc43-602ee1bf5835') & 
        (dfi.trialNum==0) & 
        (dfi.blockNum == 1) & 
        (dfi.blockKind=='E')))]

dfs = dfs[~(((dfi.gameID == '4611-415301bd-3cd2-4751-9911-e530d1bce758') & 
        (dfs.trialNum==1) & 
        (dfs.numBlocks == 1) & 
        (dfs.blockKind=='D')) |
    ((dfs.gameID == '2328-cf96d18d-a95b-4d1b-bc43-602ee1bf5835') & 
        (dfs.trialNum==0) & 
        (dfs.numBlocks == 1) & 
        (dfs.blockKind=='E')))]



In [183]:
# Mark a participant as buggy

df['buggy'] = False
dfs['buggy'] = False
dfi['buggy'] = False
df_survey['buggy'] = False

#Mark this participant as bugs found leading to >60s build time. Perhaps a very slow computer?
df.loc[df.gameID=="3988-e15c8e2e-0b53-43fd-a2d3-686d3efd6923",'buggy'] = True 
dfs.loc[dfs.gameID=="3988-e15c8e2e-0b53-43fd-a2d3-686d3efd6923",'buggy'] = True 
dfi.loc[dfi.gameID=="3988-e15c8e2e-0b53-43fd-a2d3-686d3efd6923",'buggy'] = True
df_survey.loc[df_survey.gameID=="3988-e15c8e2e-0b53-43fd-a2d3-686d3efd6923",'buggy'] = True

#Mark this participant as NaNs found for two scores.
df.loc[df.gameID=="4739-25f27c31-0d4c-46ae-a515-02351c69042d",'buggy'] = True 
dfs.loc[dfs.gameID=="4739-25f27c31-0d4c-46ae-a515-02351c69042d",'buggy'] = True 
dfi.loc[dfi.gameID=="4739-25f27c31-0d4c-46ae-a515-02351c69042d",'buggy'] = True 
df_survey.loc[df_survey.gameID=="4739-25f27c31-0d4c-46ae-a515-02351c69042d",'buggy'] = True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


## Inter-block-interval

In [184]:
def getMeanIBI(values):
    '''Obtain mean time between block placements'''
    ibis = [] 
    for x, y in zip(values[0::], values[1::]): 
        #print(x,y)
        ibi = y-x
        assert(ibi >= 0)
        ibis.append(y-x)
    return np.mean(ibis)

def getMedianIBI(values):
    '''Obtain mean time between block placements'''
    ibis = [] 
    for x, y in zip(values[0::], values[1::]): 
        #print(x,y)
        ibi = y-x
        assert(ibi >= 0)
        ibis.append(y-x)
    return np.median(ibis)


def getSDIBI(values):
    '''Obtain mean time between block placements'''
    ibis = [] 
    for x, y in zip(values[0::], values[1::]): 
        #print(x,y)
        ibi = y-x
        assert(ibi >= 0)
        ibis.append(y-x)
    return np.std(ibis)

def getMinIBI(values):
    '''Obtain mean time between block placements'''
    ibis = [] 
    for x, y in zip(values[0::], values[1::]): 
        #print(x,y)
        ibi = y-x
        assert(ibi >= 0)
        ibis.append(y-x)
    return np.min(ibis)

In [185]:
dfi = dfi.drop_duplicates(subset=['gameID','trialNum','blockNum'], keep='last')

dfIBIMean = dfi.sort_values('timeAbsolute').groupby(['gameID','trialNum'])['relativePlacementTime']\
                    .agg(getMeanIBI).reset_index()
dfIBIMean = dfIBIMean.rename(columns = {'relativePlacementTime':'meanIBI'})
df = pd.merge(df, dfIBIMean, how='left', on=['gameID','trialNum'])

dfIBIMin = dfi.sort_values('timeAbsolute').groupby(['gameID','trialNum'])['relativePlacementTime']\
                    .agg(getMinIBI).reset_index()
dfIBIMin = dfIBIMin.rename(columns = {'relativePlacementTime':'minIBI'})
df = pd.merge(df, dfIBIMin, how='left', on=['gameID','trialNum'])

thinking_time = dfi[dfi.blockNum==1][['gameID','trialNum','relativePlacementTime']]
thinking_time = thinking_time.rename(columns = {'relativePlacementTime':'thinkingTime'})
df = pd.merge(df, thinking_time, how='left', on=['gameID','trialNum'])

dfIBIMedian = dfi.sort_values('timeAbsolute').groupby(['gameID','trialNum'])['relativePlacementTime']\
                    .agg(getMedianIBI).reset_index()
dfIBIMedian = dfIBIMedian.rename(columns = {'relativePlacementTime':'medianIBI'})
df = pd.merge(df, dfIBIMedian, how='left', on=['gameID','trialNum'])

dfIBISD = dfi.sort_values('timeAbsolute').groupby(['gameID','trialNum'])['relativePlacementTime']\
                    .agg(getSDIBI).reset_index()
dfIBISD = dfIBISD.rename(columns = {'relativePlacementTime':'sdIBI'})
df = pd.merge(df, dfIBISD, how='left', on=['gameID','trialNum'])

In [186]:
df_trial_end_full_filtered

Unnamed: 0,_id,iterationName,workerId,hitID,aID,gameID,version,randID,timeRelative,timeAbsolute,...,timeBonus,buildResets,nPracticeAttempts,doNothingRepeats,bonusThresholdHigh,bonusThresholdMid,bonusThresholdLow,allVertices,blockFell,discreteWorld
0,5e29e4af280c353fe062def4,Exp2Pilot3,A2ZDEERVRN5AMC,3GKAWYFRAP703ZGRDT02TKJV75BDPZ,3TK8OJTYM2ZPK0E7UG5YOQKND83PVA,1691-3673775e-bf17-46c9-b775-79afc1355b02,82,bf1s3yl8mc71q5nq87437,205017.000,1.579804e+12,...,0.000,0,,0,0.95,0.88,0.75,"[[{'x': 495.03332757742515, 'y': 482.758686556...",False,"[[True, True, True, True, True, True, True, Tr..."
1,5e29e4f5280c353fe062deff,Exp2Pilot3,A2ZDEERVRN5AMC,3GKAWYFRAP703ZGRDT02TKJV75BDPZ,3TK8OJTYM2ZPK0E7UG5YOQKND83PVA,1691-3673775e-bf17-46c9-b775-79afc1355b02,82,bf1s3yl8mc71q5nq87437,274513.000,1.579804e+12,...,0.000,0,,0,0.95,0.88,0.75,"[[{'x': 495.1287875876538, 'y': 592.5522983949...",True,"[[True, True, True, True, True, True, True, Tr..."
2,5e29e535280c353fe062df16,Exp2Pilot3,A2ZDEERVRN5AMC,3GKAWYFRAP703ZGRDT02TKJV75BDPZ,3TK8OJTYM2ZPK0E7UG5YOQKND83PVA,1691-3673775e-bf17-46c9-b775-79afc1355b02,82,bf1s3yl8mc71q5nq87437,338985.000,1.579804e+12,...,0.000,0,,0,0.95,0.88,0.75,"[[{'x': 440, 'y': 592.5931362597067}, {'x': 66...",False,"[[True, True, True, True, True, True, True, Tr..."
3,5e29e576280c353fe062df2f,Exp2Pilot3,A2ZDEERVRN5AMC,3GKAWYFRAP703ZGRDT02TKJV75BDPZ,3TK8OJTYM2ZPK0E7UG5YOQKND83PVA,1691-3673775e-bf17-46c9-b775-79afc1355b02,82,bf1s3yl8mc71q5nq87437,403762.000,1.579804e+12,...,0.000,0,,0,0.95,0.88,0.75,"[[{'x': 384.91373972159863, 'y': 592.776560928...",False,"[[True, True, True, True, True, True, True, Tr..."
4,5e29e5b8280c353fe062df40,Exp2Pilot3,A2ZDEERVRN5AMC,3GKAWYFRAP703ZGRDT02TKJV75BDPZ,3TK8OJTYM2ZPK0E7UG5YOQKND83PVA,1691-3673775e-bf17-46c9-b775-79afc1355b02,82,bf1s3yl8mc71q5nq87437,469553.000,1.579804e+12,...,0.000,0,,0,0.95,0.88,0.75,"[[{'x': 439.98034412015824, 'y': 592.818840595...",False,"[[True, True, True, True, True, True, True, Tr..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1462,5eb446ba346be0546b2922e1,Exp2Pilot3_batch2,A17LZ3WK12T7CJ,31ANT7FQN8GDM53AFAAEMZTCAR35H4,3Z7ISHFUH19LNSK09WSQNA2ULJ4Z8Q,0873-2851611f-42e5-4913-979c-1148321ce97a,82,o8u3wicj9nuynz5xya8ak,1544658.790,1.588873e+12,...,0.000,0,,0,0.95,0.88,0.75,"[[{'x': 549.8424054564126, 'y': 593.0763798294...",False,"[[True, True, True, True, True, True, True, Tr..."
1463,5eb446fa346be0546b2922ea,Exp2Pilot3_batch2,A17LZ3WK12T7CJ,31ANT7FQN8GDM53AFAAEMZTCAR35H4,3Z7ISHFUH19LNSK09WSQNA2ULJ4Z8Q,0873-2851611f-42e5-4913-979c-1148321ce97a,82,o8u3wicj9nuynz5xya8ak,1608964.355,1.588873e+12,...,0.000,0,,0,0.95,0.88,0.75,"[[{'x': 440.07474003845846, 'y': 592.148612905...",True,"[[True, True, True, True, True, True, True, Tr..."
1464,5eb44722346be0546b2922f9,Exp2Pilot3_batch2,A17LZ3WK12T7CJ,31ANT7FQN8GDM53AFAAEMZTCAR35H4,3Z7ISHFUH19LNSK09WSQNA2ULJ4Z8Q,0873-2851611f-42e5-4913-979c-1148321ce97a,82,o8u3wicj9nuynz5xya8ak,1648566.480,1.588873e+12,...,0.005,0,,0,0.95,0.88,0.75,"[[{'x': 439.8840621744056, 'y': 592.7538103770...",False,"[[True, True, True, True, True, True, True, Tr..."
1465,5eb44752346be0546b29230e,Exp2Pilot3_batch2,A17LZ3WK12T7CJ,31ANT7FQN8GDM53AFAAEMZTCAR35H4,3Z7ISHFUH19LNSK09WSQNA2ULJ4Z8Q,0873-2851611f-42e5-4913-979c-1148321ce97a,82,o8u3wicj9nuynz5xya8ak,1696869.870,1.588873e+12,...,0.005,0,,0,0.95,0.88,0.75,"[[{'x': 439.94859898558695, 'y': 593.072511449...",False,"[[True, True, True, True, True, True, True, Tr..."


## Export Data

In [187]:
iterationName = 'Exp2Pilot3_all'

In [57]:
out_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
df.to_csv(out_path)

In [58]:
out_path = os.path.join(csv_dir,'block_silhouette_initial_{}.csv'.format(iterationName))
dfi.to_csv(out_path)

In [59]:
out_path = os.path.join(csv_dir,'block_silhouette_settled_{}.csv'.format(iterationName))
dfs.to_csv(out_path)

In [188]:
out_path = os.path.join(csv_dir,'block_silhouette_{}_good.csv'.format(iterationName))
df[~df.buggy].to_csv(out_path)

In [189]:
out_path = os.path.join(csv_dir,'block_silhouette_initial_{}_good.csv'.format(iterationName))
dfi[~dfi.buggy].to_csv(out_path)

In [190]:
out_path = os.path.join(csv_dir,'block_silhouette_settled_{}_good.csv'.format(iterationName))
dfs[~dfs.buggy].to_csv(out_path)

In [129]:
out_path = os.path.join(csv_dir,'block_silhouette_survey_{}.csv'.format(iterationName))
df_survey.to_csv(out_path)

In [120]:
df_survey[~df_survey.buggy][['gameID','age','comments','difficulty','fun','strategies','inputDevice','sex','score']]

Unnamed: 0,gameID,age,comments,difficulty,fun,strategies,inputDevice,sex,score
0,1691-3673775e-bf17-46c9-b775-79afc1355b02,28,"Hard! Much harder than I expected! I loved it,...",6,7,Unfortunately I didn't.,Mouse,Male,0.325
1,6834-b18d5535-d036-4aa4-bd46-e31404e1a9b0,50,It was vry diificult nut fun I like to use ny ...,6,7,A good base and trying to pay attention to det...,Trackpad,Female,0.09
2,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,28,I enjoyed the hit.,4,6,I just saw which piece will be suitable for th...,Mouse,Female,0.07
3,6679-27a62967-b9d8-4a25-8b63-74e16e2f7f32,30,"It was fine, no issues",2,5,redrawing the pattern by imagining that I was ...,Mouse,Female,0.635
4,0823-6fe08630-db1c-47e9-b895-7a2bf343bb77,43,"Frustrating at first, but got the hang of it a...",5,6,Not really. Started from the bottom and would...,Mouse,Male,0.805
6,0622-493f8d2b-4aa3-4b1c-ad92-f6014b9fc709,24,,7,5,,Mouse,Female,0.05
7,1106-c12ab2b8-d38b-4e8f-9244-5adfbadd1093,31,,5,6,just eyeballed it,Mouse,Female,0.405
9,9995-aa7dd1d6-ff3f-41e8-ade6-da416e7366c3,30,,4,5,no,Mouse,Female,0.17
10,4611-415301bd-3cd2-4751-9911-e530d1bce758,39,I was not able to reset,7,6,no,Mouse,Female,0.265
11,9153-0ba62326-2435-45d8-962d-97de391c7fc4,49,Everything appeared to run smoothly.,4,6,Tried to use as few blocks as possible (so I u...,Mouse,Male,0.555


In [43]:
print('age mean: ', df_survey[~df_survey.buggy]['age'].apply(int).mean())
print('age std: ', df_survey[~df_survey.buggy]['age'].apply(int).std())

age mean:  35.51020408163265
age std:  8.246520602097368


In [44]:
df_survey[~df_survey.buggy]['sex'].value_counts()

Female    28
Male      21
Name: sex, dtype: int64

In [45]:
print('bonus mean: ', df_survey[~df_survey.buggy]['score'].mean())
print('bonus std: ', df_survey[~df_survey.buggy]['score'].std())

bonus mean:  0.4346938775510205
bonus std:  0.3270900872390786
