### What this notebook does
**Step 1:** Create metadata file, containing a bunch of JSON-formatted trial metadata objects

**Step 2:**  Insert each trial as a record into a mongo database

In [1]:
import os
import numpy as np
from PIL import Image
import pandas as pd
import json
import pymongo as pm
from glob import glob
from IPython.display import clear_output

### Step 1: Create metadata file, containing a bunch of JSON-formatted trial metadata objects

In [47]:
## where are your stimulus images stored?
data_dir = 'pngTower'
bucket_name = 'curiotower'
stim_version = 'curiodrop'
dataset_name = '{}_{}'.format(bucket_name, stim_version)

## get a list of paths to each one
full_stim_paths = [os.path.abspath(os.path.join(data_dir,i)) for i in os.listdir(data_dir)]
full_stim_paths = [i for i in full_stim_paths if i.split('/')[-1] != '.DS_Store'] ## bleh
print('We have {} images to evaluate.'.format(len(full_stim_paths)))

We have 69 images to evaluate.


In [48]:
## helper to build image urls
def build_s3_url(path, bucket_name = 'curiotower'):    
    return 'https://{}.s3.amazonaws.com/{}'.format(bucket_name, path.split('/')[-1])


In [49]:
## basic metadata lists
image_urls = [build_s3_url(p) for p in full_stim_paths]
towerIDs = [p.split('/')[-1].split('.')[0] for p in full_stim_paths]


In [50]:
## convert to pandas dataframe
M = pd.DataFrame([towerIDs,image_urls]).transpose()
M.columns = ['towerID', 'imageURL']
M['stim_version'] = stim_version

In [55]:
## convert M to dictionary J and then save out to meta.js file
J = M.to_dict(orient='records') 

with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(J, fout)

{'towerID': '121319_05',
 'imageURL': 'https://curiotower.s3.amazonaws.com/121319_05.png',
 'stim_version': 'curiodrop'}

### Step 2: Insert each trial as a record into a mongo database

In [54]:
## remember to establish tunnel to mongodb on remote server first
#### e.g. by running at the terminal, `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [60]:
## actually add data now to the database
for (i,m) in enumerate(J):
    coll.insert_one(m)
    print('{} of {}| Inserting tower {}'.format(i+1, len(J), m['towerID']))
    clear_output(wait=True)

print('Done inserting records into mongo!')


Done inserting records into mongo!


In [62]:
## check collection to see what records look like
coll.find_one()

{'_id': ObjectId('5f644566d4e9aa975700e9db'),
 'towerID': '121319_05',
 'imageURL': 'https://curiotower.s3.amazonaws.com/121319_05.png',
 'stim_version': 'curiodrop'}