In [1]:
from __future__ import division
import os
import numpy as np
from PIL import Image
import matplotlib
from matplotlib import pyplot,pylab
plt = pyplot
import scipy
import seaborn as sns
sns.set_style('white')
import string
import pandas as pd
import json
import pymongo as pm
from glob import glob

### helper funcs

In [2]:
## this helps to sort in human order
import re

def tryint(s):
    try:
        return int(s)
    except ValueError:
        return s
     
def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    l.sort(key=alphanum_key)
    
def load_text(path):
    with open(path, 'r') as f:
        x = f.readlines()
    utt = x[0]
    # replace special tokens with question marks
    if '<DIA>' in utt:
        utt = utt.replace('<DIA>', '-')
    if '<UKN>' in utt:
        utt = utt.replace('<UKN>', '___')    
    return utt

def list_files(path, ext='svg'):
    result = [y for x in os.walk(path)
              for y in glob(os.path.join(x[0], '*.%s' % ext))]
    return result

### setup

In [19]:
# paths
path_to_images = './photodraw2'

In [27]:
# deborkify image path names
cats = [i for i in os.listdir(path_to_images) if i != '.DS_Store']
for c, cat in enumerate(cats):
    ims = [i for i in os.listdir(os.path.join(path_to_images,cat)) if i != '.DS_Store']
    for i, im in enumerate(ims):
        os.rename(os.path.join(path_to_images,cat,im),os.path.join(path_to_images,cat,'{}_{}.jpg'.format(cat,i)))
#         print os.path.join(path_to_images,cat,'{}_{}.jpg'.format(cat,i))

In [35]:
len(list_files(path_to_images,ext='jpg'))

34

### generate stimulus dataframe


In [4]:
print('Generating dataframe with each cue and their attributes...')    

# path to images
path_to_images = './photodraw2'
# specify list of conditions
conditions = ['photo','label']
# get list of image paths
image_paths = list_files(path_to_images,ext='jpg')
# bucket name
bucket_name = 'drawbase-demo'
# specify dataset name
dataset_name = 'photodraw2'

condition = [] # photo vs. label
category = [] 
image_id = []
image_url = []
games = [] # this field keeps track of which games this triplet has been shown in
shuffler_ind = []

## generate permuted list of triplet indices in order to be able retrieve from triplets pseudorandomly
inds = np.arange(len(conditions)*len(image_paths)) 
shuffled_inds = np.random.RandomState(0).permutation(inds)
counter = 0
for cond_ind,this_condition in enumerate(conditions):
    for im_ind,this_img in enumerate(image_paths):  
        condition.append(this_condition)
        category.append(this_img.split('/')[-2])
        _image_id = this_img.split('/')[-1].split('.')[0]
        image_id.append(_image_id)
        image_url.append('https://s3.amazonaws.com/{}/{}.jpg'.format(bucket_name,_image_id))
        games.append([])
        shuffler_ind.append(shuffled_inds[counter])
        counter += 1                

Generating dataframe with each cue and their attributes...


In [5]:
print('Generating pandas dataframe...') 
table = [condition,category,image_id,image_url,games,shuffler_ind]
headers = ['condition','category','image_id','image_url','games','shuffler_ind']
df = pd.DataFrame(table)
df = df.transpose()
df.columns = headers

Generating pandas dataframe...


In [6]:
## save out to file
print('Saving out json dictionary out to file...') 
stimdict = df.to_dict(orient='records') 
with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(stimdict, fout)

Saving out json dictionary out to file...


In [7]:
### next todo is to upload this JSON to initialize the new stimulus collection
print('next todo is to upload this JSON to initialize the new stimulus collection...')
import json
J = json.loads(open('{}_meta.js'.format(dataset_name),mode='ru').read())

next todo is to upload this JSON to initialize the new stimulus collection...


In [8]:
assert len(J)==len(image_paths)*len(conditions)
print 'dataset_name: {}'.format(dataset_name)
print 'num entries in stim dictionary: {}'.format(len(J))

dataset_name: photodraw2
num entries in stim dictionary: 32


In [9]:
## remember to establish tunnel to mongodb on remote server first

# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'rxdhawkins.me' ## cocolab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [14]:
## actually add data now to the database
reallyRun = 1
if reallyRun:
    for (i,j) in enumerate(J):
        if i%10==0:
            print ('%d of %d' % (i,len(J)))
        coll.insert_one(j)

0 of 32
10 of 32
20 of 32
30 of 32


In [15]:
## check how many records have been retrieved
a = coll.find({'shuffler_ind':{'$gte':0}})
numGames = []
for rec in a:
    numGames.append(len(rec['games']))
b = np.array(numGames)
print np.mean(b>0)

0.0


In [11]:
coll.count()

68