### setup

In [None]:
from __future__ import division

import os
import urllib, cStringIO

import pymongo as pm
import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re
import ast

from PIL import Image
import base64
import sys

from IPython.display import clear_output

## plotting
import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('talk')
sns.set_style('white')

## svg rendering 
# import ast
# from svgpathtools import parse_path, wsvg, svg2paths

#### paths etc.


In [None]:
# directory & file hierarchy
proj_dir = os.path.abspath('../')
stimulus_dir = os.getcwd()
analysis_dir = os.path.join(proj_dir,'analysis')
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))

## add helpers to python path
if os.path.join(proj_dir,'analysis','python') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'analysis','python'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       
    
# Assign variables within imported analysis helpers
import df_generation_helpers as h
if sys.version_info[0]>=3:
    from importlib import reload
reload(h)

#### load in group data csv

In [None]:
path_to_group_data = os.path.join(results_dir,'graphical_conventions.csv')
X = pd.read_csv(path_to_group_data)

## remove unnecessary columns
if 'Unnamed: 0' in X.columns:
    X = X.drop(labels=['Unnamed: 0','row_index'], axis=1)

### render sketches using svg data

In [None]:
import svg_rendering_helpers as srh

In [None]:
## extract sketch identifying info
gseries = X['gameID'].map(str)
nseries = X['trialNum'].map(str).apply(lambda x: x.zfill(2))
rseries = X['repetition'].map(str).apply(lambda x: x.zfill(2))
tseries = X['target'].map(str)

## build list of image filenames
fname_list = ['{}_{}_{}'.format(i,j,k) for (i,j,k) in zip(gseries,rseries,tseries)]

## convert svg string strings into svg string list
svg_string_list = [ast.literal_eval(i) for i in X.svgString.values]

In [None]:
## render out svg & convert to png
reload(srh)
reallyRun = 0
if reallyRun:
    for this_fname,this_svg in zip(fname_list,svg_string_list):    
        srh.render_svg(this_svg,base_dir=sketch_dir,out_fname= '{}.svg'.format(this_fname))    
        clear_output(wait=True)
        
    ## get svg path list for rendered out svg
    svg_paths = srh.generate_svg_path_list(os.path.join(sketch_dir,'svg'))    
    
    ## convert all svg to png
    srh.svg_to_png(svg_paths,base_dir=sketch_dir)    

### upload stims to s3

In [None]:
import boto
bucket_name = 'graphical-conventions-sketches'
path_to_png = os.path.join(sketch_dir,'png')
runThis = 0
if runThis:
    conn = boto.connect_s3()
    b = conn.create_bucket(bucket_name) ### if bucket already exists, then get_bucket, else create_bucket
    for ind,im in enumerate(os.listdir(path_to_png)):
        if im[-3:]=='png':
            print ind, im
            k = b.new_key(im)
            k.set_contents_from_filename(os.path.join(path_to_png,im))
            k.set_acl('public-read')
            clear_output(wait=True)

### build stimulus dictionary

**FYI**: `recog_id` refers to a unique session type in the recognition experiment, where all the sketches are guaranteed to have been generated by different participants in different repetition cycles 

In [None]:
## sanity checks
num_trials_per_recog_session = 10
assert np.unique([sum(X['recog_id']==i) for i in np.unique(X['recog_id'])])[0]==num_trials_per_recog_session

In [None]:
## subset columns that are going to be in the stimuli database for the recognition experiment
## basically, retain everything except for bigger pieces of data, e.g., png and svgString
X2 = X.drop(labels=['png','svgString'],axis=1)

In [None]:
Meta = []
for name,group in X2.groupby(['recog_id']):
    print '{}'.format(name)
    Stimdict = {}   ## initialize this to convert the list of trial dicts to a dict of dicts, with gameID as the key to each trial  
    stimdict = group.to_dict(orient='records')
    for trial in stimdict:
        target_shapenet = trial['target_shapenet']
        distractors_shapenet = ast.literal_eval(trial['distractors_shapenet'])
        distractors = ast.literal_eval(trial['distractors'])
        trial['target'] = {'shapenetid':target_shapenet, 'objectname': trial['target'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + target_shapenet+'.png'}
        trial['distractor1'] = {'shapenetid':distractors_shapenet['distractor1'], 'objectname': distractors['distractor1'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor1'] + '.png'}
        trial['distractor2'] = {'shapenetid':distractors_shapenet['distractor2'], 'objectname': distractors['distractor2'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor2'] + '.png'}
        trial['distractor3'] = {'shapenetid':distractors_shapenet['distractor3'], 'objectname': distractors['distractor3'], 'url': 'https://s3.amazonaws.com/shapenet-graphical-conventions/' + distractors_shapenet['distractor3'] + '.png'}
        trial['sketch'] = str(trial['gameID']) + '_' + str( trial['repetition']).zfill(2) + '_' + str(trial['target']['objectname'])
        trial['sketch_url'] = 'https://s3.amazonaws.com/graphical-conventions-sketches/' + trial['sketch'] + '.png'
    Stimdict['meta'] = stimdict
    Stimdict['recog_id'] = np.unique(group['recog_id'].values)[0]  
    Stimdict['games'] = []
    Meta.append(Stimdict)
    clear_output(wait=True)

### upload to mongo

In [None]:
dataset_name = 'graphical_conventions_sketches'

In [None]:
## write out metadata to json file
## for example:
# stimdict = meta.to_dict(orient='records')
# stimdict
import json
with open('{}.js'.format(dataset_name), 'w') as fout:
     json.dump(Meta, fout)

In [None]:
### next todo is to upload this JSON to initialize the new stimulus collection
print('next todo is to upload this JSON to initialize the new stimulus collection...')
import json
J = json.loads(open('{}.js'.format(dataset_name),mode='ru').read())

In [None]:
print 'dataset_name: {}'.format(dataset_name)
print 'Length of J is: {}'.format(len(J))

In [None]:
# set vars 
auth = pd.read_csv('.auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'rxdhawkins.me' ## cocolab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [None]:
## actually add data now to the database
reallyRun = 1
if reallyRun:
    for (i,j) in enumerate(J):
        if i%10==0:
            print ('%d of %d uploaded ...' % (i,len(J)))
            clear_output(wait=True)
        coll.insert_one(j)        

In [None]:
print 'We have {} records in the database.'.format(coll.count())