In [3]:
import os, sys

import pymongo as pm
import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re
from io import BytesIO
from PIL import Image
from skimage import io, img_as_float
import base64

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('talk')
sns.set_style('white')

from IPython.display import clear_output
import importlib

import utils

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

In [4]:
# directory & file hierarchy
proj_dir = os.path.abspath('..')
analysis_dir = os.getcwd()
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))

## add helpers to python path
if os.path.join(proj_dir,'utils') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'utils'))

def make_dir_if_not_exists(dir_name):   
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    return dir_name

## create directories that don't already exist        
result = [make_dir_if_not_exists(x) for x in [results_dir,plot_dir,csv_dir]]

## establish connection to mongo
Establish ssh tunnel to the server, so that requests to the mongodb can be made "as if" the mongodb server is running on your local computer. 

ssh -fNL 27017:127.0.0.1:27017 hhuey@cogtoolslab.org

In [27]:
# set vars 
auth = pd.read_csv(os.path.join(analysis_dir,'auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org'

# have to fix this to be able to analyze from local
import pymongo as pm
import socket
if socket.gethostname().split('_')[0]=='Holly':
    conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1:27017')
else: 
    conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1:27017') 
db = conn['causaldraw']
coll = db['identification']

# which iteration name should we use?
iterationName = 'pilot2'

In [28]:
# what are all the iterationNames in mongo? (use current iteration)
coll.find({}).distinct('iterationName')

['debugging', 'debugging1', 'debugging2', 'debugging3', 'pilot1', 'pilot2']

In [29]:
# how many records?
print('We have {} records in mongo.'.format(coll.estimated_document_count()))

We have 17712 records in mongo.


### Generate dataframes

In [30]:
## dump all label records into df (of current iteration)
d = coll.find({'iterationName':iterationName, 'eventType':'identification'})
D = pd.DataFrame(d)

print('We have {} unique label records in all {} of our games.'.format(D.shape[0], D['gameID'].nunique()))

We have 15213 unique label records in all 75 of our games.


In [31]:
# column cleanup
D = D.drop(columns=['circle_radius', 'fixation_image', 'fixation_size', 'sketch_size', 'targets', 'target_delay_max', 'target_delay_min', 'targets_size'])

In [32]:
D.tail()

Unnamed: 0,_id,aID,blockNum,browser_height,browser_width,condition,eventType,gameID,hitID,iterationName,...,rt,selectedTarget,sketch_id,sketch_nav,timeSketchPresented,timeTargetClicked,toy_id,trialNum,type,wID
15208,5fab153b9b97fb4be745661c,,practice,900,1440,practice,identification,6744-5f12eacf-ed18-4ed4-a3b1-90612679e567,,pilot2,...,1576,levers_2,levers_2_practice.png,data/run1/levers_2_practice.png,1605048000000.0,1605050000000.0,levers_2,,jspsych-custom-visual-search,
15209,5fab15429b97fb4be745661d,,practice,900,1440,practice,identification,6744-5f12eacf-ed18-4ed4-a3b1-90612679e567,,pilot2,...,1948,pulleys_2,pulleys_2_practice.png,data/run1/pulleys_2_practice.png,1605048000000.0,1605050000000.0,pulleys_2,,jspsych-custom-visual-search,
15210,5fab15499b97fb4be745661e,,practice,900,1440,practice,identification,6744-5f12eacf-ed18-4ed4-a3b1-90612679e567,,pilot2,...,1716,gears_1,gears_1_practice.png,data/run1/gears_1_practice.png,1605048000000.0,1605050000000.0,gears_1,,jspsych-custom-visual-search,
15211,5fab15529b97fb4be745661f,,practice,900,1440,practice,identification,6744-5f12eacf-ed18-4ed4-a3b1-90612679e567,,pilot2,...,1338,pulleys_1,pulleys_1_practice.png,data/run1/pulleys_1_practice.png,1605048000000.0,1605050000000.0,pulleys_1,,jspsych-custom-visual-search,
15212,5fab15599b97fb4be7456620,,practice,900,1440,practice,identification,6744-5f12eacf-ed18-4ed4-a3b1-90612679e567,,pilot2,...,2035,gears_2,gears_2_practice.png,data/run1/gears_2_practice.png,1605048000000.0,1605050000000.0,gears_2,,jspsych-custom-visual-search,


In [33]:
D.keys()

Index(['_id', 'aID', 'blockNum', 'browser_height', 'browser_width',
       'condition', 'eventType', 'gameID', 'hitID', 'iterationName',
       'numTrials', 'orig_gameID', 'phase', 'recruitmentPlatform', 'rt',
       'selectedTarget', 'sketch_id', 'sketch_nav', 'timeSketchPresented',
       'timeTargetClicked', 'toy_id', 'trialNum', 'type', 'wID'],
      dtype='object')

In [34]:
# count participants
D['gameID'].nunique()

75

In [35]:
## create dataframe of survey data
w = coll.find({'iterationName':iterationName, 'eventType':'survey'})
W = pd.DataFrame(w)

In [36]:
W.tail()

Unnamed: 0,_id,aID,eventType,gameID,hitID,internal_node_id,iterationName,question_order,responses,rt,time_elapsed,trial_index,trial_type,workerId
168,5fa64ced9b97fb4be74564cb,,survey,0999-e7327b2c-5527-4703-99b0-5337a33523e7,,0.0-1.0,pilot2,,"{""Q0"":""tho@ucsd.edu""}",5831.6,15118,1,survey-text,
169,5faae98d9b97fb4be74564e5,,survey,5005-efff3894-49d8-48a2-9934-3a7b6eb8aa75,,0.0-1.0,pilot2,,"{""Q0"":""jjchang@ucsd.edu""}",20598.775,60447,1,survey-text,
170,5faaf4109b97fb4be7456618,,survey,5005-efff3894-49d8-48a2-9934-3a7b6eb8aa75,,0.0-321.0,pilot2,"[0,1,2,3]","{""participantSex"":""Female"",""judgedDifficulty"":...",13853.32,2751646,321,survey-multi-choice,
171,5faaf4269b97fb4be7456619,,survey,5005-efff3894-49d8-48a2-9934-3a7b6eb8aa75,,0.0-322.0,pilot2,,"{""TechnicalDifficultiesFreeResp"":"""",""participa...",20543.025,2773205,322,survey-text,
172,5fab15079b97fb4be745661a,,survey,6744-5f12eacf-ed18-4ed4-a3b1-90612679e567,,0.0-1.0,pilot2,,"{""Q0"":""jjchang@ucsd.edu""}",28091.995,39758,1,survey-text,


In [37]:
W['responses']

0                               {"Q0":"h1kong@ucsd.edu"}
1      {"participantSex":"Female","judgedDifficulty":...
2      {"TechnicalDifficultiesFreeResp":"","participa...
3                            {"Q0":"aharyanto@ucsd.edu"}
4                             {"Q0":"jnovilla@ucsd.edu"}
5                               {"Q0":"pap022@ucsd.edu"}
6      {"participantSex":"Female","judgedDifficulty":...
7      {"TechnicalDifficultiesFreeResp":"","participa...
8                            {"Q0":"aharyanto@ucsd.edu"}
9      {"participantSex":"Female","judgedDifficulty":...
10     {"TechnicalDifficultiesFreeResp":"","participa...
11                                {"Q0":"eblu@ucsd.edu"}
12                              {"Q0":"yuw091@ucsd.edu"}
13                              {"Q0":"yuw091@ucsd.edu"}
14                              {"Q0":"yuw091@ucsd.edu"}
15                              {"Q0":"yuw091@ucsd.edu"}
16     {"participantSex":"Male","judgedDifficulty":"5...
17     {"TechnicalDifficultiesF

In [38]:
## save out to csv
D.to_csv(os.path.join(csv_dir,'causaldraw_identification_test_data_{}.csv'.format(iterationName)),index=False)
W.to_csv(os.path.join(csv_dir,'causaldraw_identification_survey_data_{}.csv'.format(iterationName)),index=False)