### What this notebook does
**Step 1:** Create metadata file, containing a bunch of JSON-formatted trial metadata objects

**Step 2:**  Insert each trial as a record into a mongo database

In [16]:
import os
import numpy as np
from PIL import Image
import pandas as pd
import json
import pymongo as pm
from glob import glob
from IPython.display import clear_output
import ast

### Step 1: Create metadata file, containing a bunch of JSON-formatted trial metadata objects

In [17]:
## where are your stimulus images stored?
data_dir = 'pngTower'
bucket_name = 'curiotower'
stim_version = 'curiodrop'
dataset_name = '{}_{}'.format(bucket_name, stim_version)

## get a list of paths to each one
full_stim_paths = [os.path.abspath(os.path.join(data_dir,i)) for i in os.listdir(data_dir)]
full_stim_paths = [i for i in full_stim_paths if i.split('/')[-1] != '.DS_Store'] ## bleh
print('We have {} images to evaluate.'.format(len(full_stim_paths)))

FileNotFoundError: [Errno 2] No such file or directory: 'pngTower'

In [None]:
## helper to build image urls
def build_s3_url(path, bucket_name = 'curiotower'):    
    return 'https://{}.s3.amazonaws.com/{}'.format(bucket_name, path.split('/')[-1])


In [None]:
## basic metadata lists
image_urls = [build_s3_url(p) for p in full_stim_paths]
towerIDs = [p.split('/')[-1].split('.')[0] for p in full_stim_paths]


In [None]:
## convert to pandas dataframe
M = pd.DataFrame([towerIDs,image_urls]).transpose()
M.columns = ['towerID', 'image_url']
M['stim_version'] = stim_version
M['games'] = '[]' ## empty games list for marking records when retrieved from mongo (see store.js)
M['games'] = M['games'].apply(lambda x: ast.literal_eval(x))

In [None]:
## convert M to dictionary J and then save out to meta.js file
J = M.to_dict(orient='records') 

with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(J, fout)

### Step 2: Insert each trial as a record into a mongo database

In [13]:
## remember to establish tunnel to mongodb on remote server first
#### e.g. by running at the terminal, `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

# set vars 
auth = pd.read_csv('../analysis/auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [None]:
## actually add data now to the database
for (i,m) in enumerate(J):
    coll.insert_one(m)
    print('{} of {}| Inserting tower {}'.format(i+1, len(J), m['towerID']))
    clear_output(wait=True)

print('Done inserting records into mongo!')


In [None]:
## check collection to see what records look like
coll.find_one()

In [None]:
coll.estimated_document_count()

In [None]:
# create function to test insertion of records 
    # is there anything in it right now, and if so, does it match the number of records that we want
    # i.e. should we add/overwrite
    # how many records do we expect there to be in this collection
        #if this matches the number we want, and we do not want to overwrite, then leave alone
        # if these numbers are different and you want to update/change collection, then: drop old and insert new

In [None]:
current_record_count = coll.estimated_document_count()
new_records_count = len(J)



# Alternative method: bring in all trials as single packet (rather than each individually)


### This part is the same

In [80]:
## where are your stimulus images stored?
data_dir = 'tdw_png'
project_name = 'curiotower'
stim_version = 'tdw-height3Jitter3' #'curiodrop'
dataset_name = '{}-{}'.format(project_name, stim_version)

## get a list of paths to each one
full_stim_paths = [os.path.abspath(os.path.join(data_dir,i)) for i in os.listdir(data_dir)]
full_stim_paths = [i for i in full_stim_paths if i.split('/')[-1] != '.DS_Store'] ## bleh
print('We have {} images to evaluate.'.format(len(full_stim_paths)))

We have 146 images to evaluate.


In [84]:
## helper to build image urls
def build_s3_url(path, dataset_name = 'curiotower'):    
    return 'https://{}.s3.amazonaws.com/{}'.format(dataset_name, path.split('/')[-1])

In [85]:
## basic metadata lists
image_urls = [build_s3_url(p,dataset_name = 'curiotower-tdw') for p in full_stim_paths]
towerIDs = [p.split('/')[-1].split('.')[0] for p in full_stim_paths]

In [86]:
#Catch trials are those with num_blocks = 1
M = pd.DataFrame([towerIDs,image_urls]).transpose()
M.columns = ['towerID', 'image_url']
M.head()

Unnamed: 0,towerID,image_url
0,curiotower_2_high_0002_1,https://curiotower-tdw.s3.amazonaws.com/curiot...
1,curiotower_4_high_0003_0,https://curiotower-tdw.s3.amazonaws.com/curiot...
2,curiotower_4_high_0003_1,https://curiotower-tdw.s3.amazonaws.com/curiot...
3,curiotower_2_high_0002_0,https://curiotower-tdw.s3.amazonaws.com/curiot...
4,curiotower_4_high_0001_1,https://curiotower-tdw.s3.amazonaws.com/curiot...


In [87]:
## convert to pandas dataframe
M = pd.DataFrame([towerIDs,image_urls]).transpose()
M.columns = ['towerID', 'image_url']
M['num_blocks'] = [pd.to_numeric(x) for x in M['image_url'].apply(lambda x: x.split("/")[-1].split("_")[1])]
M['stim_version'] = stim_version
M = M.assign(catch_trial = M.apply(lambda x: True if x['num_blocks']==1 else False, axis=1)) #catch trials are those with only one block
M['games'] = '[]' ## empty games list for marking records when retrieved from mongo (see store.js)
M['games'] = M['games'].apply(lambda x: ast.literal_eval(x))


print("Number of catch trials:", len(M[M['catch_trial']==True]))
assert len(M[M['catch_trial']==False]) == 144

Number of catch trials: 2


## The key difference is treating all the records as a single "meta" record

In [88]:
#initalize list of all version dictionaries
Meta = []
stimList = M.to_dict(orient='records')
stimDict = {}
stimDict['meta'] = stimList
stimDict['games'] = [] 
stimDict['experimentName'] = dataset_name
Meta.append(stimDict)


In [89]:
Meta

[{'meta': [{'towerID': 'curiotower_2_high_0002_1',
    'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_2_high_0002_1.png',
    'num_blocks': 2,
    'stim_version': 'tdw-height3Jitter3',
    'catch_trial': False,
    'games': []},
   {'towerID': 'curiotower_4_high_0003_0',
    'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_4_high_0003_0.png',
    'num_blocks': 4,
    'stim_version': 'tdw-height3Jitter3',
    'catch_trial': False,
    'games': []},
   {'towerID': 'curiotower_4_high_0003_1',
    'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_4_high_0003_1.png',
    'num_blocks': 4,
    'stim_version': 'tdw-height3Jitter3',
    'catch_trial': False,
    'games': []},
   {'towerID': 'curiotower_2_high_0002_0',
    'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_2_high_0002_0.png',
    'num_blocks': 2,
    'stim_version': 'tdw-height3Jitter3',
    'catch_trial': False,
    'games': []},
   {'towerID': 'curiotower_4_high_00

In [98]:
print('Saving out json dictionary out to file...') 
with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(Meta, fout)
print('Done!')

Saving out json dictionary out to file...
Done!


In [99]:
## remember to establish tunnel to mongodb on remote server first
#### e.g. by running at the terminal, `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

# set vars 
auth = pd.read_csv('../analysis/auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [100]:
#reload JSON back in to the new stimulus collection
J = json.loads(open('{}_meta.js'.format(dataset_name),mode='r').read())
print('dataset_name: {}'.format(dataset_name))
print('Length of J is: {}'.format(len(J)))

dataset_name: curiotower-tdw-height3Jitter3
Length of J is: 1


In [101]:
#db.drop_collection('curiotower-tdw-height3Jitter3')

In [102]:
#get list of current collections
sorted(db.list_collection_names())

['block-construction-silhouette-exp01',
 'block-construction-silhouette-exp02',
 'causaldraw',
 'causaldraw_annotations',
 'causaldraw_annotations_patching',
 'causaldraw_identification',
 'causaldraw_intervention',
 'causaldraw_intervention_patching',
 'collabdraw_collab8_recog',
 'curiotower-tdw',
 'curiotower-tdw-height3Jitter3',
 'curiotower_curiodrop',
 'graphical_conventions_object_annotation',
 'graphical_conventions_semantic_mapping',
 'graphical_conventions_semantic_mapping_patching',
 'graphical_conventions_semantic_mapping_spline_version_old',
 'iternum_classification',
 'photodraw2',
 'semantic_parts_graphical_conventions',
 'svg_annotation_sketchpad_basic_allcats']

In [103]:
## actually add data now to the database
for (i,m) in enumerate(J):
    coll.insert_one(m)
    print('{} of {}'.format(i+1, len(J)))
    clear_output(wait=True)

print('Done inserting records into mongo!')

Done inserting records into mongo!


In [104]:
coll.estimated_document_count()

2

In [105]:
coll.find_one()

{'_id': ObjectId('5fffb6af6a37d285d63173ec'),
 'meta': [{'towerID': 'curiotower_2_high_0002_1',
   'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_2_high_0002_1.png',
   'num_blocks': 2,
   'stim_version': 'tdw-height3Jitter3',
   'catch_trial': False,
   'games': []},
  {'towerID': 'curiotower_4_high_0003_0',
   'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_4_high_0003_0.png',
   'num_blocks': 4,
   'stim_version': 'tdw-height3Jitter3',
   'catch_trial': False,
   'games': []},
  {'towerID': 'curiotower_4_high_0003_1',
   'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_4_high_0003_1.png',
   'num_blocks': 4,
   'stim_version': 'tdw-height3Jitter3',
   'catch_trial': False,
   'games': []},
  {'towerID': 'curiotower_2_high_0002_0',
   'image_url': 'https://curiotower-tdw.s3.amazonaws.com/curiotower_2_high_0002_0.png',
   'num_blocks': 2,
   'stim_version': 'tdw-height3Jitter3',
   'catch_trial': False,
   'games': []},
  {'towerID': 