### setup

In [2]:
from __future__ import division

import os
import urllib, cStringIO

import pymongo as pm
import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re
import ast

from PIL import Image
import base64
import sys

from IPython.display import clear_output

## plotting
import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('talk')
sns.set_style('white')

from collections import Counter

## svg rendering 
# import ast
# from svgpathtools import parse_path, wsvg, svg2paths

#### paths etc.


In [20]:
# directory & file hierarchy
proj_dir = os.path.abspath('../')
stimulus_dir = os.getcwd()
analysis_dir = os.path.join(proj_dir,'analysis')
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))

## add helpers to python path
if os.path.join(proj_dir,'analysis','python') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'analysis','python'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       
    
# Assign variables within imported analysis helpers
import df_generation_helpers as h
if sys.version_info[0]>=3:
    from importlib import reload
reload(h)

<module 'df_generation_helpers' from '/Users/judithfan/graphical_conventions/analysis/python/df_generation_helpers.pyc'>

#### load in group data csv

In [32]:
## name of experiment
this_experiment = 'refgame2.0'

## update name of sketch dir so sketches from each experiment are nested appropriately
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches',this_experiment))

## extract appropriate filename suffix based on experiment name
experiment_dict = {'refgame1.2':'run3run4','refgame2.0':'run5_submitButton'}
exp_ext = experiment_dict[this_experiment]

path_to_group_data = os.path.join(csv_dir,'graphical_conventions_group_data_{}.csv'.format(exp_ext))
X = pd.read_csv(path_to_group_data)

## handle missing data (missing draw duration measurements)
X = h.preprocess_dataframe(X)

## remove unnecessary columns
if 'Unnamed: 0' in X.columns:
    X = X.drop(labels=['Unnamed: 0','row_index'], axis=1)

Dataframe initially contained 65 unique games. Now contains 65 games.
There were 0 outlier games: []. Now filtered.


### render sketches using svg data (can be skipped if already rendered)

In [33]:
import svg_rendering_helpers as srh

In [34]:
## extract sketch identifying info
gseries = X['gameID'].map(str)
nseries = X['trialNum'].map(str).apply(lambda x: x.zfill(2))
rseries = X['repetition'].map(str).apply(lambda x: x.zfill(2))
tseries = X['target'].map(str)

## build list of image filenames
fname_list = ['{}_{}_{}'.format(i,j,k) for (i,j,k) in zip(gseries,rseries,tseries)]

## convert svg string strings into svg string list
svg_string_list = [ast.literal_eval(i) for i in X.svgString.values]

In [27]:
## render out svg & convert to png
reload(srh)
reallyRun = 1
if reallyRun:
    for this_fname,this_svg in zip(fname_list,svg_string_list):    
        srh.render_svg(this_svg,base_dir=sketch_dir,out_fname= '{}.svg'.format(this_fname))    
        clear_output(wait=True)
        
    ## get svg path list for rendered out svg
    svg_paths = srh.generate_svg_path_list(os.path.join(sketch_dir,'svg'))    
    
    ## convert all svg to png
    srh.svg_to_png(svg_paths,base_dir=sketch_dir)    

convert /Users/judithfan/graphical_conventions/sketches/refgame2.0/svg/9834-afb725fc-5a68-4c1b-a6f1-5f45c61e3776_07_waiting_05.svg /Users/judithfan/graphical_conventions/sketches/refgame2.0/png/9834-afb725fc-5a68-4c1b-a6f1-5f45c61e3776_07_waiting_05.png


### upload stims to s3 (can be skipped if already rendered)

In [28]:
import boto
bucket_name = 'graphical-conventions-sketches'
path_to_png = os.path.join(sketch_dir,'png')
runThis = 0
if runThis:
    conn = boto.connect_s3()
    b = conn.create_bucket(bucket_name) ### if bucket already exists, then get_bucket, else create_bucket
    for ind,im in enumerate(os.listdir(path_to_png)):
        if im[-3:]=='png':
            print ind, im
            k = b.new_key(im)
            k.set_contents_from_filename(os.path.join(path_to_png,im))
            k.set_acl('public-read')
            clear_output(wait=True)

2599 9834-afb725fc-5a68-4c1b-a6f1-5f45c61e3776_07_waiting_05.png


### build stimulus dictionary

**FYI**: `recog_id` refers to a unique session type in the recognition experiment, where all the sketches are guaranteed to have been generated by different participants in different repetition cycles 

#### from refgame1.2 (run3run4)
- `Meta_yoked` : 67 bundles of 40 trials (each bundle corresponding to 1 refgame)
- `Meta_scrambled` : 67 bundles of 40 trials (the four trials of each repetition coming from different games)
- `Meta` : 268 bundles of 10 trials (each trial corresponding to some repetition (8 repeated + 2 control))

#### from refgame2.0  (run5)
- `Meta_yoked_refgame2.0` : 67 bundles of 40 trials (each bundle corresponding to 1 refgame)
- `Meta_scrambled_refgame2.0` : 67 bundles of 40 trials (the four trials of each repetition coming from different games)
- `Meta_refgame2.0` : 268 bundles of 10 trials (each trial corresponding to some repetition (8 repeated + 2 control))

In [51]:
## sanity checks
num_trials_per_recog_session = 10
assert np.unique([sum(X['recog_id']==i) for i in np.unique(X['recog_id'])])[0]==num_trials_per_recog_session

In [52]:
## subset columns that are going to be in the stimuli database for the recognition experiment
## basically, retain everything except for bigger pieces of data, e.g., png and svgString
X2 = X.drop(labels=['png','svgString'],axis=1)

### yoked

In [43]:
## do we only want to upload what is remaining for the yoked?
upload_targeted = False
print 'Do we only want to upload what is remaining for the yoked? {}'.format(upload_targeted)

## load in remaining gameIDs to run in yoked
remaining_gameIDs = pd.read_csv('orig_gameIDs_remaining_yoked.csv')
remaining_gameIDs = remaining_gameIDs.gameID.values

## subset the whole dataframe by the gameIDs that remain to be run
if upload_targeted:
    _X2 = X2[X2['gameID'].isin(remaining_gameIDs)]
    print 'Subsetting by remaining gameIDs and assigning to new dataframe variable.'
else:
    _X2 = X2
    print 'Do not subset, and assign copy of old dataframe variable to new variable.'    


Do we only want to upload what is remaining for the yoked? False
Do not subset, and assign copy of old dataframe variable to new variable.


In [44]:
Meta_yoked = []
for name, group in _X2.groupby(['gameID']):
    print '{}'.format(name)
    Stimdict = {}
    stimdict = group.to_dict(orient='records')
    for trial in stimdict:
        target_shapenet = trial['target_shapenet']
        distractors_shapenet = ast.literal_eval(trial['distractors_shapenet'])
        distractors = ast.literal_eval(trial['distractors'])
        trial['target'] = {'shapenetid':target_shapenet, 'objectname': trial['target'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + target_shapenet+'.png'}
        trial['distractor1'] = {'shapenetid':distractors_shapenet['distractor1'], 'objectname': distractors['distractor1'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor1'] + '.png'}
        trial['distractor2'] = {'shapenetid':distractors_shapenet['distractor2'], 'objectname': distractors['distractor2'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor2'] + '.png'}
        trial['distractor3'] = {'shapenetid':distractors_shapenet['distractor3'], 'objectname': distractors['distractor3'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor3'] + '.png'}
        trial['sketch'] = str(trial['gameID']) + '_' + str( trial['repetition']).zfill(2) + '_' + str(trial['target']['objectname'])
        trial['sketch_url'] = 'https://s3.amazonaws.com/graphical-conventions-sketches/' + trial['sketch'] + '.png'
        trial['orig_gameID'] = trial.pop('gameID') ## rename gameID as orig_gameID        
    Stimdict['meta'] = stimdict 
    Stimdict['games'] = []
    Stimdict['gameID'] = np.unique(group['gameID'].values)[0]  
    Meta_yoked.append(Stimdict)
    clear_output(wait=True)
    
## ensure that number of gameIDs associated with yoked is 1
assert len(Counter([i['orig_gameID'] for i in Meta_yoked[0]['meta']]).keys())==1

9834-afb725fc-5a68-4c1b-a6f1-5f45c61e3776


### sequence-scrambled 

In [45]:
## do we only want to upload what is remaining for the scrambled40?
upload_targeted = False
print 'Do we only want to upload what is remaining for the scrambled40? {}'.format(upload_targeted)

## create new column called grouped_recog_id
X2['grouped_recog_id'] = np.int64(X2['recog_id']/4)

## load in remaining gameIDs to run in scrambled40
remaining_ids = pd.read_csv('grouped_recog_ids_remaining_scrambled40.csv')
remaining_ids = remaining_ids.grouped_recog_id.values
print 'There are {} remaining grouped recog ids to run.'.format(len(remaining_ids))

## subset the whole dataframe by the gameIDs that remain to be run
if upload_targeted:
    __X2 = X2[X2['grouped_recog_id'].isin(remaining_ids)]
    print 'Subsetting by remaining gameIDs and assigning to new dataframe variable.'
else:
    __X2 = X2
    print 'Do not subset, and assign copy of old dataframe variable to new variable.'    

Do we only want to upload what is remaining for the scrambled40? False
There are 0 remaining grouped recog ids to run.
Do not subset, and assign copy of old dataframe variable to new variable.


In [46]:
Meta_scrambled = []
for name, group in __X2.groupby(['grouped_recog_id']):
    print '{}'.format(name)
    Stimdict = {}
    stimdict = group.to_dict(orient='records')
    for trial in stimdict:
        target_shapenet = trial['target_shapenet']
        distractors_shapenet = ast.literal_eval(trial['distractors_shapenet'])
        distractors = ast.literal_eval(trial['distractors'])
        trial['target'] = {'shapenetid':target_shapenet, 'objectname': trial['target'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + target_shapenet+'.png'}
        trial['distractor1'] = {'shapenetid':distractors_shapenet['distractor1'], 'objectname': distractors['distractor1'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor1'] + '.png'}
        trial['distractor2'] = {'shapenetid':distractors_shapenet['distractor2'], 'objectname': distractors['distractor2'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor2'] + '.png'}
        trial['distractor3'] = {'shapenetid':distractors_shapenet['distractor3'], 'objectname': distractors['distractor3'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor3'] + '.png'}
        trial['sketch'] = str(trial['gameID']) + '_' + str( trial['repetition']).zfill(2) + '_' + str(trial['target']['objectname'])
        trial['sketch_url'] = 'https://s3.amazonaws.com/graphical-conventions-sketches/' + trial['sketch'] + '.png'
        trial['orig_gameID'] = trial.pop('gameID') ## rename gameID as orig_gameID
    stimdict_sorted = sorted(stimdict, key=lambda k: k['trialNum']) ## sorts dictionary by trialNum inline
    Stimdict['meta'] = stimdict_sorted
    Stimdict['recog_id'] = np.unique(group['grouped_recog_id'].values)[0]  
    Stimdict['games'] = []
    Meta_scrambled.append(Stimdict)
    clear_output(wait=True)
    
## make sure that there 4 reps per gameID, and 10 unique gameID's
assert np.unique(Counter([i['orig_gameID'] for i in Meta_scrambled[0]['meta']]).values())[0]==4
assert len(np.unique(Counter([i['orig_gameID'] for i in Meta_scrambled[0]['meta']]).keys()))==10    

64


In [47]:
np.unique(__X2['grouped_recog_id'].values)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64])

### scrambled10

In [54]:
# original recog id 
Meta = []
for name,group in X2.groupby(['recog_id']):
    print '{}'.format(name)
    Stimdict = {}   ## initialize this to convert the list of trial dicts to a dict of dicts, with gameID as the key to each trial  
    stimdict = group.to_dict(orient='records')
    for trial in stimdict:
        target_shapenet = trial['target_shapenet']
        distractors_shapenet = ast.literal_eval(trial['distractors_shapenet'])
        distractors = ast.literal_eval(trial['distractors'])
        trial['target'] = {'shapenetid':target_shapenet, 'objectname': trial['target'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + target_shapenet+'.png'}
        trial['distractor1'] = {'shapenetid':distractors_shapenet['distractor1'], 'objectname': distractors['distractor1'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor1'] + '.png'}
        trial['distractor2'] = {'shapenetid':distractors_shapenet['distractor2'], 'objectname': distractors['distractor2'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor2'] + '.png'}
        trial['distractor3'] = {'shapenetid':distractors_shapenet['distractor3'], 'objectname': distractors['distractor3'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor3'] + '.png'}
        trial['sketch'] = str(trial['gameID']) + '_' + str( trial['repetition']).zfill(2) + '_' + str(trial['target']['objectname'])
        trial['sketch_url'] = 'https://s3.amazonaws.com/graphical-conventions-sketches/' + trial['sketch'] + '.png'
        trial['orig_gameID'] = trial.pop('gameID') ## rename gameID as orig_gameID
    Stimdict['meta'] = stimdict
    Stimdict['recog_id'] = np.unique(group['recog_id'].values)[0]  
    Stimdict['games'] = []
    Meta.append(Stimdict)
    clear_output(wait=True)

259


### upload to mongo

**FYI**: `recog_id` refers to a unique session type in the recognition experiment, where all the sketches are guaranteed to have been generated by different participants in different repetition cycles 

`graphical_conventions_sketches_yoked` : 67 bundles of 40 trials (each bundle corresponding to 1 refgame)

`graphical_conventions_sketches_scrambled40` : 67 bundles of 40 trials (the four trials of each repetition coming from different games)

`graphical_conventions_sketches_scrambled10` : 268 bundles of 10 trials (each trial corresponding to some repetition (8 repeated + 2 control))

Each coll also has a + '_dev' version

In [55]:
## define dataset names for each 
yoked = 'graphical_conventions_sketches_yoked_{}'.format(this_experiment)
scrambled40 = 'graphical_conventions_sketches_scrambled40_{}'.format(this_experiment)
scrambled10 = 'graphical_conventions_sketches_scrambled10_{}'.format(this_experiment)

if upload_targeted:
    yoked = 'graphical_conventions_sketches_yoked_remaining'.format(this_experiment)
    scrambled40 = 'graphical_conventions_sketches_scrambled40_remaining'.format(this_experiment)

In [56]:
## write out metadata to json file
## for example:
# stimdict = meta.to_dict(orient='records')
# stimdict
import json
with open('{}_{}.js'.format(yoked,this_experiment), 'w') as fout:
     json.dump(Meta_yoked, fout)
with open('{}_{}.js'.format(scrambled40,this_experiment), 'w') as fout:
     json.dump(Meta_scrambled, fout)
with open('{}_{}.js'.format(scrambled10,this_experiment), 'w') as fout:
     json.dump(Meta, fout)

In [57]:
### next todo is to upload this JSON to initialize the new stimulus collection
import json
J_yoked = json.loads(open('{}_{}.js'.format(yoked,this_experiment),mode='ru').read())
J_scrambled40 = json.loads(open('{}_{}.js'.format(scrambled40,this_experiment),mode='ru').read())
J_scrambled10 = json.loads(open('{}_{}.js'.format(scrambled10,this_experiment),mode='ru').read())

In [58]:
print 'dataset_name: {}'.format(yoked)
print 'Length of J is: {}'.format(len(J_yoked))
print 'dataset_name: {}'.format(scrambled40)
print 'Length of J is: {}'.format(len(J_scrambled40))
print 'dataset_name: {}'.format(scrambled10)
print 'Length of J is: {}'.format(len(J_scrambled10))

dataset_name: graphical_conventions_sketches_yoked_refgame2.0
Length of J is: 65
dataset_name: graphical_conventions_sketches_scrambled40_refgame2.0
Length of J is: 65
dataset_name: graphical_conventions_sketches_scrambled10_refgame2.0
Length of J is: 260


In [60]:
# set vars 
auth = pd.read_csv('.auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'stanford-cogsci.org' ## cocolab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll_yoked = db[yoked]
coll_scrambled40 = db[scrambled40]
yoked_dev = yoked + '_dev'
scrambled40_dev = scrambled40 + '_dev'
scrambled10_dev = scrambled10 + '_dev'
coll_yoked_dev = db[yoked_dev]
coll_scrambled40_dev = db[scrambled40_dev]
coll_scrambled10_dev = db[scrambled10_dev]

In [68]:
## actually add data now to the database
## recommended sequence: J_yoked, J_yoked_dev, J_scrambled40, J_scrambled40_dev
reallyRun = 0
if reallyRun:
    Y = zip([J_yoked, J_yoked, J_scrambled40, J_scrambled40],\
            [coll_yoked, coll_yoked_dev, coll_scrambled40, coll_scrambled40_dev])
    for i,y in enumerate(Y):
        J = y[0]
        coll = y[1]
        for (i,j) in enumerate(J):
            if i%1==0:
                print ('%d of %d uploaded ...' % (i,len(J)))
                clear_output(wait=True)
            coll.insert_one(j)        

64 of 65 uploaded ...


In [75]:
print 'We have {} records in the database.'.format(coll.count())

We have 65 records in the database.


In [81]:
# assert db['graphical_conventions_sketches_yoked_remaining_{}'.format(this_experiment)].count()==len(J_yoked)
assert db['graphical_conventions_sketches_yoked_{}'.format(this_experiment)].count()==65
assert db['graphical_conventions_sketches_yoked_{}_dev'.format(this_experiment)].count()==65
# assert db['graphical_conventions_sketches_scrambled40_remaining_{}'.format(this_experiment)].count()==len(J_scrambled40)
assert db['graphical_conventions_sketches_scrambled40_{}_dev'.format(this_experiment)].count()==65
assert db['graphical_conventions_sketches_scrambled40_{}'.format(this_experiment)].count()==65
# assert db['graphical_conventions_sketches_scrambled10_{}_dev'.format(this_experiment)].count()==260
# assert db['graphical_conventions_sketches_scrambled10_{}'.format(this_experiment)].count()==260

In [82]:
sorted(db.collection_names())

[u'chairs140',
 u'chairs1k',
 u'chairs1k_archived',
 u'chairs1k_expansion_only',
 u'chairs2k',
 u'chairs2k_expansion_only',
 u'graphical_conventions_sketches_scrambled10',
 u'graphical_conventions_sketches_scrambled10_dev',
 u'graphical_conventions_sketches_scrambled40',
 u'graphical_conventions_sketches_scrambled40_dev',
 u'graphical_conventions_sketches_scrambled40_refgame2.0',
 u'graphical_conventions_sketches_scrambled40_refgame2.0_dev',
 u'graphical_conventions_sketches_scrambled40_remaining',
 u'graphical_conventions_sketches_yoked',
 u'graphical_conventions_sketches_yoked_dev',
 u'graphical_conventions_sketches_yoked_refgame2.0',
 u'graphical_conventions_sketches_yoked_refgame2.0_dev',
 u'graphical_conventions_sketches_yoked_remaining',
 u'kiddraw_tracing_eval',
 u'kiddraw_tracing_eval2',
 u'kiddraw_tracing_eval_dev',
 u'kiddraw_tracing_eval_square_copy',
 u'kiddraw_tracing_eval_square_copy_dev',
 u'photodraw2',
 u'shapenet_chairs_speaker_eval',
 u'sketchpad_basic_pilot2_sketche

In [78]:
db['graphical_conventions_sketches_yoked'].count()

67