### What this notebook does
**Step 1:** Create metadata file, containing a bunch of JSON-formatted trial metadata objects

**Step 2:**  Insert each trial as a record into a mongo database

In [1]:
import os
import numpy as np
from PIL import Image
import pandas as pd
import json
import pymongo as pm
from glob import glob
from IPython.display import clear_output
import ast

### Step 1: Create metadata file, containing a bunch of JSON-formatted trial metadata objects

In [2]:
## where are your stimulus images stored?
data_dir = 'pngTower'
bucket_name = 'curiotower'
stim_version = 'curiodrop'
dataset_name = '{}_{}'.format(bucket_name, stim_version)

## get a list of paths to each one
full_stim_paths = [os.path.abspath(os.path.join(data_dir,i)) for i in os.listdir(data_dir)]
full_stim_paths = [i for i in full_stim_paths if i.split('/')[-1] != '.DS_Store'] ## bleh
print('We have {} images to evaluate.'.format(len(full_stim_paths)))

We have 7 images to evaluate.


In [None]:
## helper to build image urls
def build_s3_url(path, bucket_name = 'curiotower'):    
    return 'https://{}.s3.amazonaws.com/{}'.format(bucket_name, path.split('/')[-1])


In [None]:
## basic metadata lists
image_urls = [build_s3_url(p) for p in full_stim_paths]
towerIDs = [p.split('/')[-1].split('.')[0] for p in full_stim_paths]


In [None]:
## convert to pandas dataframe
M = pd.DataFrame([towerIDs,image_urls]).transpose()
M.columns = ['towerID', 'image_url']
M['stim_version'] = stim_version
M['games'] = '[]' ## empty games list for marking records when retrieved from mongo (see store.js)
M['games'] = M['games'].apply(lambda x: ast.literal_eval(x))

In [None]:
## convert M to dictionary J and then save out to meta.js file
J = M.to_dict(orient='records') 

with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(J, fout)

### Step 2: Insert each trial as a record into a mongo database

In [13]:
## remember to establish tunnel to mongodb on remote server first
#### e.g. by running at the terminal, `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

# set vars 
auth = pd.read_csv('../analysis/auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [None]:
## actually add data now to the database
for (i,m) in enumerate(J):
    coll.insert_one(m)
    print('{} of {}| Inserting tower {}'.format(i+1, len(J), m['towerID']))
    clear_output(wait=True)

print('Done inserting records into mongo!')


In [None]:
## check collection to see what records look like
coll.find_one()

In [None]:
coll.estimated_document_count()

In [None]:
# create function to test insertion of records 
    # is there anything in it right now, and if so, does it match the number of records that we want
    # i.e. should we add/overwrite
    # how many records do we expect there to be in this collection
        #if this matches the number we want, and we do not want to overwrite, then leave alone
        # if these numbers are different and you want to update/change collection, then: drop old and insert new

In [None]:
current_record_count = coll.estimated_document_count()
new_records_count = len(J)



# Alternative method: bring in all trials as single packet (rather than each individually)


### This part is the same

In [2]:
## where are your stimulus images stored?
data_dir = 'pngTower'
bucket_name = 'curiotower'
stim_version = 'curiodrop'
dataset_name = '{}_{}'.format(bucket_name, stim_version)

## get a list of paths to each one
full_stim_paths = [os.path.abspath(os.path.join(data_dir,i)) for i in os.listdir(data_dir)]
full_stim_paths = [i for i in full_stim_paths if i.split('/')[-1] != '.DS_Store'] ## bleh
print('We have {} images to evaluate.'.format(len(full_stim_paths)))

We have 10 images to evaluate.


In [3]:
## helper to build image urls
def build_s3_url(path, bucket_name = 'curiotower'):    
    return 'https://{}.s3.amazonaws.com/{}'.format(bucket_name, path.split('/')[-1])

In [4]:
## basic metadata lists
image_urls = [build_s3_url(p) for p in full_stim_paths]
towerIDs = [p.split('/')[-1].split('.')[0] for p in full_stim_paths]

In [18]:
## convert to pandas dataframe
M = pd.DataFrame([towerIDs,image_urls]).transpose()
M.columns = ['towerID', 'image_url']
M['stim_version'] = stim_version
M['catch_trial'] = M['image_url'].apply(lambda x: 'catch' in x)
M['games'] = '[]' ## empty games list for marking records when retrieved from mongo (see store.js)
M['games'] = M['games'].apply(lambda x: ast.literal_eval(x))


In [19]:
M.head()

Unnamed: 0,towerID,image_url,stim_version,catch_trial,games
0,121619_09,https://curiotower.s3.amazonaws.com/121619_09.png,curiodrop,False,[]
1,121119_10b,https://curiotower.s3.amazonaws.com/121119_10b...,curiodrop,False,[]
2,121119_09,https://curiotower.s3.amazonaws.com/121119_09.png,curiodrop,False,[]
3,catch_01,https://curiotower.s3.amazonaws.com/catch_01.jpg,curiodrop,True,[]
4,catch_02,https://curiotower.s3.amazonaws.com/catch_02.jpg,curiodrop,True,[]


## The key difference is treating all the records as a single "meta" record

In [20]:
#initalize list of all version dictionaries
Meta = []
stimList = M.to_dict(orient='records')
stimDict = {}
stimDict['meta'] = stimList
stimDict['games'] = [] 
stimDict['experimentName'] = dataset_name
Meta.append(stimDict)


In [21]:
Meta

[{'meta': [{'towerID': '121619_09',
    'image_url': 'https://curiotower.s3.amazonaws.com/121619_09.png',
    'stim_version': 'curiodrop',
    'catch_trial': False,
    'games': []},
   {'towerID': '121119_10b',
    'image_url': 'https://curiotower.s3.amazonaws.com/121119_10b.png',
    'stim_version': 'curiodrop',
    'catch_trial': False,
    'games': []},
   {'towerID': '121119_09',
    'image_url': 'https://curiotower.s3.amazonaws.com/121119_09.png',
    'stim_version': 'curiodrop',
    'catch_trial': False,
    'games': []},
   {'towerID': 'catch_01',
    'image_url': 'https://curiotower.s3.amazonaws.com/catch_01.jpg',
    'stim_version': 'curiodrop',
    'catch_trial': True,
    'games': []},
   {'towerID': 'catch_02',
    'image_url': 'https://curiotower.s3.amazonaws.com/catch_02.jpg',
    'stim_version': 'curiodrop',
    'catch_trial': True,
    'games': []},
   {'towerID': 'catch_03',
    'image_url': 'https://curiotower.s3.amazonaws.com/catch_03.jpg',
    'stim_version': 'curi

In [22]:
print('Saving out json dictionary out to file...') 
with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(Meta, fout)
print('Done!')

Saving out json dictionary out to file...
Done!


In [25]:
## remember to establish tunnel to mongodb on remote server first
#### e.g. by running at the terminal, `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

# set vars 
auth = pd.read_csv('../analysis/auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [28]:
#reload JSON back in to the new stimulus collection
J = json.loads(open('{}_meta.js'.format(dataset_name),mode='r').read())
print('dataset_name: {}'.format(dataset_name))
print('Length of J is: {}'.format(len(J)))

dataset_name: curiotower_curiodrop
Length of J is: 1


In [27]:
#db.drop_collection('curiotower_curiodrop')

{'ns': 'stimuli.curiotower_curiodrop', 'nIndexesWas': 1, 'ok': 1.0}

In [30]:
## actually add data now to the database
for (i,m) in enumerate(J):
    coll.insert_one(m)
    print('{} of {}'.format(i+1, len(J)))
    clear_output(wait=True)

print('Done inserting records into mongo!')

Done inserting records into mongo!


In [31]:
coll.estimated_document_count()

1

In [32]:
coll.find_one()

{'_id': ObjectId('5fbddcb6c8054a3d48fa1fad'),
 'meta': [{'towerID': '121619_09',
   'image_url': 'https://curiotower.s3.amazonaws.com/121619_09.png',
   'stim_version': 'curiodrop',
   'catch_trial': False,
   'games': []},
  {'towerID': '121119_10b',
   'image_url': 'https://curiotower.s3.amazonaws.com/121119_10b.png',
   'stim_version': 'curiodrop',
   'catch_trial': False,
   'games': []},
  {'towerID': '121119_09',
   'image_url': 'https://curiotower.s3.amazonaws.com/121119_09.png',
   'stim_version': 'curiodrop',
   'catch_trial': False,
   'games': []},
  {'towerID': 'catch_01',
   'image_url': 'https://curiotower.s3.amazonaws.com/catch_01.jpg',
   'stim_version': 'curiodrop',
   'catch_trial': True,
   'games': []},
  {'towerID': 'catch_02',
   'image_url': 'https://curiotower.s3.amazonaws.com/catch_02.jpg',
   'stim_version': 'curiodrop',
   'catch_trial': True,
   'games': []},
  {'towerID': 'catch_03',
   'image_url': 'https://curiotower.s3.amazonaws.com/catch_03.jpg',
   'st