### this notebook is to generate stims for graph-comprehension-benchmarking
Before this notebook, `gcb_data.csv` was manually generated by selecting the original stimuli data from the VLAT and GGR papers


In [1]:
## generally useful
import os
import numpy as np
import pandas as pd
import pymongo as pm
from glob import glob
import json
import ast

## handling communication with Amazon Web Services using Python
import boto3 
import botocore

## general jupyter notebook utilities
from IPython.display import clear_output

## image downloading & processing
from PIL import Image
import requests
from io import BytesIO

In [2]:
# directory & file hierarchy
proj_dir = os.path.abspath('..')
stimuli_dir = os.getcwd()

### load data

In [17]:
## load in pre-generated metadata CSV file
data = {
    "0": {
        "class": "rabbit",
        "photo_id": 0,
        "sketch_id": 0,
        "keypoint_id": 0,
        "url1": "https://sketchy.eye.gatech.edu/explore/photos/rabbit/n02325366_10628.jpg",
        "url2": "https://sketchy.eye.gatech.edu/explore/sketches/rabbit/n02325366_10628-1.png",
        "trial_num": "25",
        "trial_seed": "1025",
        "zone_id": "1"
    },
    "1": {
        "class": "banana",
        "photo_id": 0,
        "sketch_id": 0,
        "keypoint_id": 0,
        "url1": "https://sketchy.eye.gatech.edu/explore/photos/banana/n07753592_10692.jpg",
        "url2": "https://sketchy.eye.gatech.edu/explore/sketches/banana/n07753592_10692-1.png",
        "trial_num": "6",
        "trial_seed": "5006",
        "zone_id": "1"
    },
    "2": {
        "class": "cat",
        "photo_id": 0,
        "sketch_id": 0,
        "keypoint_id": 0,
        "url1": "https://sketchy.eye.gatech.edu/explore/photos/cat/n02121620_11995.jpg",
        "url2": "https://sketchy.eye.gatech.edu/explore/sketches/cat/n02121620_11995-2.png",
        "trial_num": "14",
        "trial_seed": "4014",
        "zone_id": "1"
    },
    "3": {
        "class": "helicopter",
        "photo_id": 0,
        "sketch_id": 0,
        "keypoint_id": 0,
        "url1": "https://sketchy.eye.gatech.edu/explore/photos/helicopter/n03512147_1201.jpg",
        "url2": "https://sketchy.eye.gatech.edu/explore/sketches/helicopter/n03512147_1201-1.png",
        "trial_num": "20",
        "trial_seed": "5020",
        "zone_id": "1"
    },
}

meta = pd.DataFrame(data).transpose()
meta['experimentName'] = 'sketchy_test_split_human_keypoint'
meta['gcbID'] = 0

In [35]:
meta[0].to_json()

KeyError: 0

In [19]:
## make copy
final = meta.copy()

In [20]:
## initalize list of all version dictionaries
Meta = [] 

printed=False
for name, group in final.groupby('experimentName'): 
    display(name)
    print('Adding {} to Meta'.format(name)) 
    clear_output(wait=True)
    Batch = {} # initialize batch-level dictionary 
    Trials = [] # list of trials

    for n,g in group.groupby('gcbID'): # looping over trials within a batch
        stims = g.to_dict(orient='records')
        Trials.append(stims) 
    Batch['meta'] = Trials 
    Batch['games'] = []
    Meta.append(Batch)
    
# print('Done!') 

'sketchy_test_split_human_keypoint'

Adding sketchy_test_split_human_keypoint to Meta


In [21]:
# Meta

In [22]:
## check how many Meta are being inserted
len(Meta)

1

In [23]:
dataset_name = 'sketchy_test_split_input'
print('Saving out json dictionary out to file...') 
with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(Meta, fout)
print('Done!')

Saving out json dictionary out to file...
Done!


In [24]:
#reload JSON back in to the new stimulus collection
J = json.loads(open('{}_meta.js'.format(dataset_name),mode='r').read())
print('dataset_name: {}'.format(dataset_name))
print('Length of J is: {}'.format(len(J)))

dataset_name: sketchy_test_split_input
Length of J is: 1


### insert metadata into mongo
Run this command at the terminal, `ssh -fNL 27017:127.0.0.1:27017 hhuey@cogtoolslab.org`

In [38]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'cabUser'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://cabUser:' + pswd + '@cogtoolslab.org')
print(conn)
db = conn['sketch_rgb_input'] ## everyone in the lab shares this stimulus database
coll = db["sketchy_test_split"]
coll

MongoClient(host=['cogtoolslab.org:27017'], document_class=dict, tz_aware=False, connect=True)


Collection(Database(MongoClient(host=['cogtoolslab.org:27017'], document_class=dict, tz_aware=False, connect=True), 'sketch_rgb_input'), 'sketchy_test_split')

In [37]:
## now really insert data
reallyRun = True
if reallyRun:
    for (i,j) in enumerate(J):
        print ('%d of %d uploaded ...' % (i+1,len(J)))
        clear_output(wait=True)
        coll.insert_one(j)
print('Done!')

KeyboardInterrupt: 

In [27]:
## check how many records were inserted
coll.estimated_document_count()

KeyboardInterrupt: 

In [17]:
## inspect one of these annotation sessions
coll.find_one()

{'_id': ObjectId('6275991bcfb15b1f96f61375'),
 'meta': [[{'gcbID': 'item_1',
    'chart_cat': 'bar_chart_level_1',
    'origStudy': 'GGR',
    'blockNum': 1,
    'origStudy_order_num': 'q_01',
    'prompt': 'What percentage of patients recovered after chemotherapy?',
    'options': ['write your answer'],
    'corrAns': '“35” , “35%”',
    'graphType': 'bar_chart',
    'origStudy_taskCategorization': 'level_1',
    'promptTitle': 'Here is some information about cancer therapies',
    'questionType': 'fill_in_blank',
    'pluginType': 'survey-text-custom',
    'graphFilename': 'ggr_g1.png',
    'VLAT_base': 'custom_answer',
    'experimentName': 'graph_comprehension_benchmarking',
    'filename_navpath': '/Users/hl/ucsd/fyp/graph-comprehension-benchmarking/stimuli/images/ggr_g1.png'}],
  [{'gcbID': 'item_10',
    'chart_cat': 'bar_chart_level_3',
    'origStudy': 'GGR',
    'blockNum': 10,
    'origStudy_order_num': 'q_10',
    'prompt': 'Compared to the placebo, which treatment leads to

In [18]:
## check dir names in mongoDB
db.list_collection_names()

['iternum2_recog2',
 'lax_drawing_nuts-bolts_all',
 'physics_explanations',
 'physics_trust_agent',
 'environment_structuring_tan_search_post',
 'lax_drawing_wheels_all',
 'human-physics-benchmarking-containment-pilot_example',
 'human-physics-benchmarking-linking-pilot_example',
 'human-physics-benchmarking-towers-pilot_example',
 'kiddraw_annotations_small_animals',
 'human-physics-benchmarking-clothiness-pilot_iteration_1',
 'tools_for_block_construction_playground_action_and_subgoal_generation',
 'human-physics-benchmarking-dominoes-redyellow-pilot_production_1',
 'causaldraw_annotations_patching',
 'causaldraw',
 'lax_structures_castle_all',
 'lax_structures_city',
 'zipping_3_counterbalanced',
 'curiotower_curiodrop',
 'human-physics-benchmarking-collision_iteration_2',
 'zipping_test',
 'environment_structuring_tan_arrangement_select_all_rt_spread',
 'graphical_conventions_semantic_mapping',
 'kiddraw_annotations_vehicles',
 'physics_trust_agent_study2_v2',
 'human-physics-bench

In [13]:
## BE EXTRA CAREFUL WITH THIS -- this is to drop records in the causaldraw_annotations collection, which is stored in
## in the db. Because all grads' records are stored in the stimuli db, we want to be extra careful with this!!!

#db.drop_collection('graph_comprehension_benchmarking')

{'ns': 'stimuli.graph_comprehension_benchmarking', 'nIndexesWas': 1, 'ok': 1.0}