# Generating metadata

### What this notebook does
**Step 1:** Create metadata file, containing a bunch of JSON-formatted trial metadata objects

**Step 2:**  Insert each trial as a record into a mongo database

This assumes that the stimuli have been uploaded to the S3 bucket using `upload_stims_to_s3.ipynb`.

In [1]:
#Which experiment? bucket_name is the name of the experiment and will be name of the databases both on mongoDB and S3
bucket_name = 'human-physics-benchmarking-dominoes-redyellow-pilot' #CHANGE THIS ⚡️
stim_version = 'production_1' #CHANGE THIS ⚡️
random_seed = 42

In [2]:
import os
import numpy as np
from PIL import Image
import pandas as pd
import json
import pymongo as pm
from glob import glob
from IPython.display import clear_output
import ast
import itertools
import random
import h5py

In [3]:
#set random number state
rng = np.random.RandomState(seed=random_seed)

In [4]:
def list_files(paths, ext='mp4'):
    """Pass list of folders if there are stimuli in multiple folders. 
    Make sure that the containing folder is informative, as the rest of the path is ignored in naming. 
    Also returns filenames as uploaded to S3"""
    if type(paths) is not list:
        paths = [paths]
    results = []
    names = []
    for path in paths:
        results += [y for x in os.walk(path) for y in glob(os.path.join(x[0], '*.%s' % ext))]
        names += [os.path.basename(os.path.dirname(y))+'_'+os.path.split(y)[1] for x in os.walk(path) for y in glob(os.path.join(x[0], '*.%s' % ext))]
    hdf5s = [r.split("_img.")[0]+".hdf5" for r in results]
    return results,names,hdf5s

In [5]:
## helper to build stim urls
def build_s3_url(filename, bucket_name):    
    return 'https://{}.s3.amazonaws.com/{}'.format(bucket_name, filename)

In [6]:
local_stem = '/Users/dbear/neuroailab/physics_benchmarking/stimuli/dominoes_ry2/' #CHANGE THIS ⚡️ # needs trailing /
dirnames = [d.split('/')[-1] for d in glob(local_stem+'/*')]
data_dirs = [local_stem + d for d in dirnames]

dataset_name = '{}_{}'.format(bucket_name, stim_version)
stimulus_extension = "mp4" #what's the file extension for the stims? Provide without dot

## get a list of paths to each one
full_stim_paths,filenames, _  = list_files(data_dirs,stimulus_extension)
full_map_paths, mapnames, _ = list_files(data_dirs, ext = 'png') #generate filenames and stimpaths for target/zone map
full_hdf5_paths,hdf5_names, _ = list_files(data_dirs,ext = 'hdf5')
print('We have {} stimuli to evaluate.'.format(len(full_stim_paths)))

We have 554 stimuli to evaluate.


In [8]:
# make sure to only up the _img pass
full_stim_paths = [p for p in full_stim_paths if '_img' in p] 
filenames = [p for p in filenames if '_img' in p] 
print('We have {} stimuli to upload.'.format(len(full_stim_paths)))   

We have 277 stimuli to upload.


In [9]:
assert all('_img' in p for p in full_stim_paths), "Make sure to only pass in `_img` passes!"

In [10]:
## basic metadata lists
stim_urls = [build_s3_url(p,bucket_name) for p in filenames]
stim_map_urls = [build_s3_url(p,bucket_name) for p in mapnames]
stim_IDs = [name.split('.')[0] for name in filenames]
stim_hdf5_urls = [build_s3_url(p,bucket_name) for p in hdf5_names]
hdf5_paths = ['/'.join(p.split('/')[-2:]) for p in full_hdf5_paths]
stim_sets = [p.split('/')[-2:-1][0] for p in full_stim_paths]

In [11]:
## convert to pandas dataframe
M = pd.DataFrame([stim_IDs,stim_urls, stim_map_urls, hdf5_paths, stim_hdf5_urls, stim_sets]).transpose()
M.columns = ['stim_ID', 'stim_url', 'map_url', 'hdf5_path','hdf5_url', 'stim_set']

Add metadata to the stimuli

From `metadata.json`:

In [31]:
# if needed, add code to add additional columns
# Add trial labels to the metadata using the stimulus metadata.json
target_hit_zone_labels = dict()
for _dir in data_dirs:
    with open(_dir + '/metadata.json', 'rb') as f:
        trial_metas = json.load(f)
        
    for i,meta in enumerate(trial_metas):
        stim_name = meta['stimulus_name']
        ## for dominoes only
#         stim_name = stim_name.split('_')
#         stim_name[-2] += '-redyellow'
#         stim_name = '_'.join(stim_name)
        ## \end for dominoes only
        if stim_name == 'None': #recreate stimname from order in metadata
            stim_name = str(i).zfill(4)
            stim_name = _dir.split('/')[-1] + '_' + stim_name
        if stim_name[-4:] != "_img": stim_name+='_img' #stimnames need to end in "_img"
        label = meta['does_target_contact_zone']
        target_hit_zone_labels[stim_name] = label
        
print("num positive labels: %d" % sum(list(target_hit_zone_labels.values())))
print("num negative labels: %d" % (len(target_hit_zone_labels) - sum(list(target_hit_zone_labels.values()))))

num positive labels: 145
num negative labels: 132


In [32]:
# make new df with all metadata
GT = pd.DataFrame([list(target_hit_zone_labels.keys()), list(target_hit_zone_labels.values())]).transpose()
GT.columns = ['stim_ID', 'target_hit_zone_label']

In [34]:
#sanity check—this should be empty save for maybe leftover familiarization stims
set(M['stim_ID']).symmetric_difference(GT['stim_ID'])

set()

In [35]:
# merge with M
# if all([col not in M.columns for col in GT.columns[1:]]):
M = M.merge(GT, on='stim_ID')
print("added labels %s" % list(GT.columns[1:]))

added labels ['target_hit_zone_label']


We also want to have a number of familiarization trials. The stims are expected in the S3 bucket with that filename. 

In [38]:
familiarization_stem = 'pilot_dominoes_default_boxroom-redyellow' #CHANGE THIS ⚡️

NUM_FAM_TRIALS = 5 #how many familiarizationt trials per condition?

In [39]:
#sample an equal amount of positive and negative trials from the familiarization stem
positive_rows_fam = M[(M['target_hit_zone_label'] == True) & (M['stim_set'] == familiarization_stem)].to_dict(orient='records')
positive_rows_fam = list(rng.choice(positive_rows_fam,NUM_FAM_TRIALS,replace=False))
negative_rows_fam = M[(M['target_hit_zone_label'] == False) & (M['stim_set'] == familiarization_stem)].to_dict(orient='records')
negative_rows_fam = list(rng.choice(negative_rows_fam,NUM_FAM_TRIALS,replace=False))
all_rows_fam = positive_rows_fam + negative_rows_fam
rng.shuffle(all_rows_fam)

In [40]:
familiarization_M = pd.DataFrame(all_rows_fam)
# save the familiariaziation dict
familiarization_trials = familiarization_M.transpose().to_dict()
# needs to have strings as keys
familiarization_trials = {str(key):value for key, value in familiarization_trials.items()}

In [41]:
familiarization_M

Unnamed: 0,stim_ID,stim_url,map_url,hdf5_path,hdf5_url,stim_set,target_hit_zone_label
0,pilot_dominoes_default_boxroom-redyellow_0000_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0016....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
1,pilot_dominoes_default_boxroom-redyellow_0010_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0000....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
2,pilot_dominoes_default_boxroom-redyellow_0013_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0012....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
3,pilot_dominoes_default_boxroom-redyellow_0015_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0019....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
4,pilot_dominoes_default_boxroom-redyellow_0008_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0010....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
5,pilot_dominoes_default_boxroom-redyellow_0016_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0014....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
6,pilot_dominoes_default_boxroom-redyellow_0017_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0005....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
7,pilot_dominoes_default_boxroom-redyellow_0014_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0002....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
8,pilot_dominoes_default_boxroom-redyellow_0003_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0004....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
9,pilot_dominoes_default_boxroom-redyellow_0007_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0013....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True


In [42]:
assert len(familiarization_M) == len(familiarization_M['stim_ID'].unique())

In [43]:
len(M),len(familiarization_M)

(277, 10)

Get a set of main stimuli

In [44]:
## drop the stims that have the same stem as the familiarization trials
M = M[~(M['stim_set'] == familiarization_stem)]

In [45]:
len(M),len(familiarization_M)

(257, 10)

In [46]:
# remove some bad stimuli -- regenerate these
bad_stimuli = [
    #filename of bad stimuli #CHANGE THIS IF NEEDED ⚡️
]

for nm in bad_stimuli:
    M = M[~M['stim_ID'].str.contains(nm)]

In [47]:
len(M)

257

We want to make sure that we have the same amount of stimuli. To prevent data from being too sparse, we also might want to sample a subset of stimuli.

In [48]:
STIM_SET_SIZE = 150 #how many total stimuli do we want?

In [49]:
per_label_num = min(len(M[M['target_hit_zone_label'] == False]),
                   len(M[M['target_hit_zone_label'] == True]),
                   int(STIM_SET_SIZE/2))
print("We get", per_label_num, "stimuli per label for a total of", per_label_num * 2)

We get 75 stimuli per label for a total of 150


In [50]:
positive_rows = M[M['target_hit_zone_label'] == True].to_dict(orient='records')
positive_rows = list(rng.choice(positive_rows,per_label_num,replace=False))
negative_rows = M[M['target_hit_zone_label'] == False].to_dict(orient='records')
negative_rows = list(rng.choice(negative_rows,per_label_num,replace=False))
all_rows = positive_rows + negative_rows

In [51]:
# recreate M
M = pd.DataFrame(all_rows)

In [52]:
M

Unnamed: 0,stim_ID,stim_url,map_url,hdf5_path,hdf5_url,stim_set,target_hit_zone_label
0,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,True
1,pilot_dominoes_4mid_tdwroom-redyellow_0002_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_tdwroom-redyellow/0004.hdf5,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_tdwroom-redyellow,True
2,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,True
3,pilot_dominoes_4mid_boxroom_2-redyellow_0002_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_boxroom_2-redyellow/0004.hdf5,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_boxroom_2-redyellow,True
4,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,True
...,...,...,...,...,...,...,...
145,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,False
146,pilot_dominoes_4midRM1_boxroom-redyellow_0002_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4midRM1_boxroom-redyellow/0004....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4midRM1_boxroom-redyellow,False
147,pilot_dominoes_SJ020_d3chairs_o1plants_tdwroom...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_SJ020_d3chairs_o1plants_tdwroom...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_SJ020_d3chairs_o1plants_tdwroom...,False
148,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,False


In [53]:
assert len(M) == len(M['stim_ID'].unique())

In [54]:
#lets save the experimental stims out into a json
M.to_json(bucket_name+'_'+stim_version+'_experimental_stims'+".json")

From `hdf5s` get all the metadata:

In [55]:
filenames

['pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0010_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0000_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0026_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0009_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0019_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0027_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0018_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0008_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0001_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0011_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0025_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0013_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-redyellow_0003_img.mp4',
 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom-red

In [56]:
metadata = {} #holds all the metadata for all stimuli

for name,hdf5_path in zip([f.split('.')[0] for f in filenames],full_hdf5_paths):
    #load hdf5
#     print("loading",hdf5_path)
    try:
        hdf5 = h5py.File(hdf5_path,'r') #get the static part of the HDF5
        stim_name = str(np.array(hdf5['static']['stimulus_name']))
        metadatum = {} #metadata for the current stimulus
        for key in hdf5['static'].keys():
            datum = np.array(hdf5['static'][key])
            if datum.shape == (): datum = datum.item() #unwrap non-arrays
            metadatum[key] = datum
        #close file
        hdf5.close()
        metadata[name] = metadatum
    except Exception as e:
        print("Error with",hdf5_path,":",e)
        continue


In [57]:
metadata[list(metadata.keys())[0]]

{'bounciness': array([0., 0., 0.]),
 'color': array([[0.09072573, 0.79650046, 0.16743275],
        [0.88516131, 0.97733569, 0.93571481],
        [0.26029119, 0.57819323, 0.38789493],
        [0.1307142 , 0.07118967, 0.66801897],
        [0.9302648 , 0.48598503, 0.17492277],
        [0.50785269, 0.29808587, 0.31162042],
        [0.13239958, 0.65690075, 0.36832202]]),
 'distractors': array([b'coffee_maker', b'coffee_005_13', b'b04_dog'], dtype=object),
 'dynamic_friction': array([0.1 , 0.5 , 0.01]),
 'git_commit': b'7e1051a0fbd62347c739bec27b7b2d9e3d26efba',
 'initial_position': array([[ 0.85      ,  0.        ,  0.        ],
        [ 0.25      ,  0.        ,  0.        ],
        [-0.25      ,  0.        ,  0.        ],
        [ 1.074305  ,  0.        , -1.        ],
        [-0.17080982,  0.        ,  1.        ],
        [-0.25      ,  0.        ,  1.        ],
        [-1.7528123 ,  0.        , -0.75      ]], dtype=float32),
 'initial_rotation': array([[   0.      ,    0.      ,   

Insert those metadatas into M:

In [58]:
for index in M.index:
    stim_name = M.at[index,'stim_ID']
    for key,value in metadata[stim_name].items():
        M.at[index,key] = str(value) #insert every item as string

In [59]:
meta

{'stimulus_name': 'pilot_dominoes_1mid_J025R45_o1full_tdwroom_0029',
 'controller_name': 'MultiDominoes',
 'git_commit': '7e1051a0fbd62347c739bec27b7b2d9e3d26efba',
 'num_frames': 151,
 'is_trial_valid': True,
 'does_target_move': False,
 'does_target_contact_zone': False,
 'does_target_miss_zone': False,
 'does_target_hit_ground': False,
 'first_target_move_frame': None,
 'first_target_contact_zone_frame': None,
 'first_target_hit_ground_frame': None,
 'final_target_displacement': {'x': 0.0, 'y': 0.004, 'z': 0.0},
 'final_target_mask_displacement': [0.001, -0.008],
 'target_visible_area': 0.001,
 'zone_visible_area': 0.012,
 'probe_visible_area': 0.03,
 'is_any_object_fully_occluded': True,
 'room': "b'tdw'",
 'trial_seed': 1029,
 'push_time': 24,
 'num_distractors': 0,
 'num_occluders': 1,
 'num_middle_objects': 1,
 'remove_middle': False}

Ok, so we now have a list of all the stimuli to evaluate. Let's create random orders of them and submit them to the database. Set the number of stimuli shown to one participant and the number of different orders to generate (`num_of_permutations`). Each of these sets can be shown to a participant, with the least often shown set being shown (so we can get away with fewer sets than subjects). 

In [60]:
M

Unnamed: 0,stim_ID,stim_url,map_url,hdf5_path,hdf5_url,stim_set,target_hit_zone_label,bounciness,color,distractors,...,scale_z,seed,static_friction,stimulus_name,target_id,target_rotation,target_type,trial_num,trial_seed,zone_id
0,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,True,[0. 0. 0. 0. 0.],[[0.01326196 0.72362173 0.11219857]\n [0.95215...,[b'emeco_navy_chair' b'lunar_globe_art_auctor'...,...,[2. 0.25 0.25 0.25 0...,5,[1. 0.5 0.01 0.5 0.5 ],b'pilot_dominoes_2mid_J020R15_d3chairs_o1plant...,2,[0. 0. 0.],b'cube',15,5015,1
1,pilot_dominoes_4mid_tdwroom-redyellow_0002_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_tdwroom-redyellow/0004.hdf5,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_tdwroom-redyellow,True,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.],[[0.25675728 0.68622878 0.32427455]\n [0.07435...,[],...,[2. 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 ...,420,[1. 0.5 0.01 0.5 0.5 0.5 0.5 0.5 0.5 ...,b'pilot_dominoes_4mid_tdwroom_0004',2,[0. 0. 0.],b'cube',4,420004,1
2,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,True,[0. 0. 0. 0. 0.],[[0.11947987 0.12745932 0.57293219]\n [0.39483...,[b'vase_06' b'vase_01' b'vase_06'],...,[2. 0.25 0.25 0.25 0...,5,[1. 0.5 0.01 0.5 0.5 ],b'pilot_dominoes_2mid_J020R15_d3chairs_o1plant...,2,[0. 0. 0.],b'cube',1,5001,1
3,pilot_dominoes_4mid_boxroom_2-redyellow_0002_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_boxroom_2-redyellow/0004.hdf5,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4mid_boxroom_2-redyellow,True,[0. 0. 0. 0. 0. 0. 0.],[[0.96887664 0.18701734 0.97022084]\n [0.94513...,[],...,[2. 0.25 0.25 0.25 0.25 0.25 0.25],42,[1. 0.5 0.01 0.5 0.5 0.5 0.5 ],b'pilot_dominoes_4mid_boxroom_2_0004',2,[0. 0. 0.],b'cube',4,42004,1
4,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,True,[0. 0. 0. 0.],[[0.46790774 0.53468374 0.97830906]\n [0.98055...,[],...,[ 2. 0.25 0.25 0.25 ...,1,[1. 0.5 0.01 0.5 ],b'pilot_dominoes_1mid_J025R45_o1full_tdwroom_0...,2,[0. 0. 0.],b'cube',0,1000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_1mid_J025R45_o1full_tdwroom-red...,False,[0. 0. 0. 0.],[[0.22082622 0.82042387 0.45184933]\n [0.45080...,[],...,[2. 0.25 0.25 0.25 4.22518],1,[1. 0.5 0.01 0.5 ],b'pilot_dominoes_1mid_J025R45_o1full_tdwroom_0...,2,[0. 0. 0.],b'cube',26,1026,1
146,pilot_dominoes_4midRM1_boxroom-redyellow_0002_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4midRM1_boxroom-redyellow/0004....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_4midRM1_boxroom-redyellow,False,[0. 0. 0. 0. 0. 0.],[[0.48887057 0.06223785 0.65019905]\n [0.44953...,[],...,[2. 0.25 0.25 0.25 0.25 0.25],5,[1. 0.5 0.01 0.5 0.5 0.5 ],b'pilot_dominoes_4midRM1_boxroom_0004',2,[0. 0. 0.],b'cube',4,5004,1
147,pilot_dominoes_SJ020_d3chairs_o1plants_tdwroom...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_SJ020_d3chairs_o1plants_tdwroom...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_SJ020_d3chairs_o1plants_tdwroom...,False,[0. 0. 0. 0. 0. 0.],[[0.54085996 0.7329319 0.79550525]\n [0.47662...,[b'animal_dog_rtsit_1280' b'animal_dog_rtsit_1...,...,[2. 0.25 0.25 0.25 0...,3,[1. 0.5 0.01 0.5 0.5 0.5 ],b'pilot_dominoes_SJ020_d3chairs_o1plants_tdwro...,2,[0. 0. 0.],b'cube',25,3025,1
148,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_2mid_J020R15_d3chairs_o1plants_...,False,[0. 0. 0. 0. 0.],[[0.63593721 0.26791091 0.94427953]\n [0.13866...,[b'naughtone_pinch_stool_chair' b'b04_dog' b'b...,...,[2. 0.25 0.25 0.25 0...,5,[1. 0.5 0.01 0.5 0.5 ],b'pilot_dominoes_2mid_J020R15_d3chairs_o1plant...,2,[0. 0. 0.],b'cube',23,5023,1


In [61]:
len(M)

150

In [62]:
stims_per_run = len(M) #len(M) to show all stimuli
num_of_sets = 125 #how many different orders to produce?

In [63]:
#generate list of stimuli as dicts
L = M.to_dict(orient='records')

In [64]:
#generate list of lists of shuffled, sampled w/o replacement stimuli
stim_sets = []
for i in range(num_of_sets):
    stim_set = list(rng.choice(L, stims_per_run, replace=False))
    stim_sets += [{str(num):stim for num,stim in enumerate(stim_set)}]

In [65]:
len(stim_sets)

125

In [66]:
familiarization_M

Unnamed: 0,stim_ID,stim_url,map_url,hdf5_path,hdf5_url,stim_set,target_hit_zone_label
0,pilot_dominoes_default_boxroom-redyellow_0000_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0016....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
1,pilot_dominoes_default_boxroom-redyellow_0010_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0000....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
2,pilot_dominoes_default_boxroom-redyellow_0013_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0012....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
3,pilot_dominoes_default_boxroom-redyellow_0015_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0019....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
4,pilot_dominoes_default_boxroom-redyellow_0008_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0010....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
5,pilot_dominoes_default_boxroom-redyellow_0016_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0014....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,False
6,pilot_dominoes_default_boxroom-redyellow_0017_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0005....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
7,pilot_dominoes_default_boxroom-redyellow_0014_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0002....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
8,pilot_dominoes_default_boxroom-redyellow_0003_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0004....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True
9,pilot_dominoes_default_boxroom-redyellow_0007_img,https://human-physics-benchmarking-dominoes-re...,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow/0013....,https://human-physics-benchmarking-dominoes-re...,pilot_dominoes_default_boxroom-redyellow,True


In [67]:
#these will be our familiarization trials
familiarization_trials = familiarization_M.transpose().to_dict()
familiarization_trials = {str(key):value for key, value in familiarization_trials.items()}
# familiarization_trials

Now we have a list of sequences of stimuli.

Set up ssh bridge to write to mongodb. Insert your username. If you don't have an SSH secret set yet, run `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org` in your shell. \
*CHANGE THIS ⚡️*

In [68]:
!ssh -fNL 27017:127.0.0.1:27017 dbear@cogtoolslab.org

In [69]:
# set vars 
auth = pd.read_csv('../auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user. Place it in the toplevel of the repo
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [70]:
print(dataset_name)

human-physics-benchmarking-dominoes-redyellow-pilot_production_1


In [71]:
#get list of current collections
sorted(db.list_collection_names())

['block-construction-silhouette-exp01',
 'block-construction-silhouette-exp02',
 'causaldraw',
 'causaldraw_annotations',
 'causaldraw_annotations_fix_disagreements',
 'causaldraw_annotations_patching',
 'causaldraw_identification',
 'causaldraw_intervention',
 'causaldraw_intervention_patching',
 'collabdraw_collab8_recog',
 'compositional-abstractions-prior-elicitation_example',
 'curiotower-tdw',
 'curiotower-tdw-height3Jitter3',
 'curiotower_curiodrop',
 'dominoes-pilot_example',
 'graphical_conventions_object_annotation',
 'graphical_conventions_semantic_mapping',
 'graphical_conventions_semantic_mapping_patching',
 'graphical_conventions_semantic_mapping_spline_version_old',
 'human-physics-benchmarking-clothiness-pilot_iteration_1',
 'human-physics-benchmarking-clothiness-pilot_production_1',
 'human-physics-benchmarking-collision-pilot_iteration_1',
 'human-physics-benchmarking-collision-pilot_iteration_2',
 'human-physics-benchmarking-collision-pilot_production_1',
 'human-phy

In [72]:
#⚠️drop collection if necessary. 
db.drop_collection(dataset_name) 

{'ok': 0.0,
 'errmsg': 'ns not found',
 'code': 26,
 'codeName': 'NamespaceNotFound'}

Let's **do it**!

In [73]:
#lets save the experiment structure out into a json as well
#rows are different games, columns are the full metadata for the nth stimulus
pd.DataFrame(stim_sets).to_json(bucket_name+'_'+stim_version+'_experiment'+".json")

In [74]:
## actually add data now to the database
for (i,m) in enumerate(stim_sets):
    coll.insert_one({'stims':m, 'familiarization_stims': familiarization_trials})
    print('{} of {}'.format(i+1, len(stim_sets)))
    clear_output(wait=True)

print('Done inserting records into mongo! The collection name is',dataset_name)

Done inserting records into mongo! The collection name is human-physics-benchmarking-dominoes-redyellow-pilot_production_1


In [75]:
coll.estimated_document_count()

125

Let's pull a bit from the database to check if it worked

In [76]:
coll.find_one()

{'_id': ObjectId('60aa80b51772eea9f008c800'),
 'stims': {'0': {'stim_ID': 'pilot_dominoes_2mid_J020R15_d3chairs_o1plants_tdwroom_2-redyellow_0004_img',
   'stim_url': 'https://human-physics-benchmarking-dominoes-redyellow-pilot.s3.amazonaws.com/pilot_dominoes_2mid_J020R15_d3chairs_o1plants_tdwroom_2-redyellow_0004_img.mp4',
   'map_url': 'https://human-physics-benchmarking-dominoes-redyellow-pilot.s3.amazonaws.com/pilot_dominoes_2mid_J020R15_d3chairs_o1plants_tdwroom_2-redyellow_0009_map.png',
   'hdf5_path': 'pilot_dominoes_2mid_J020R15_d3chairs_o1plants_tdwroom_2-redyellow/0002.hdf5',
   'hdf5_url': 'https://human-physics-benchmarking-dominoes-redyellow-pilot.s3.amazonaws.com/pilot_dominoes_2mid_J020R15_d3chairs_o1plants_tdwroom_2-redyellow_0002.hdf5',
   'stim_set': 'pilot_dominoes_2mid_J020R15_d3chairs_o1plants_tdwroom_2-redyellow',
   'target_hit_zone_label': False,
   'bounciness': '[0. 0. 0. 0. 0.]',
   'color': '[[0.71748306 0.44963866 0.26882626]\n [0.69987793 0.36876917 0.656

In [78]:
# list(M.stim_ID)

# Compare to original

In [85]:
orig_stem = '/Users/dbear/neuroailab/physics_benchmarking/human-physics-benchmarking/stimuli/generation/pilot-dominoes/' #CHANGE THIS ⚡️ # needs trailing /
orig_dirnames = [d.split('/')[-1] for d in glob(local_stem+'/*')]
orig_data_dirs = [orig_stem + d for d in orig_dirnames]

In [81]:
# if needed, add code to add additional columns
# Add trial labels to the metadata using the stimulus metadata.json
orig_target_hit_zone_labels = dict()
for _dir in data_dirs:
    with open(_dir + '/metadata.json', 'rb') as f:
        trial_metas = json.load(f)
        
    for i,meta in enumerate(trial_metas):
        stim_name = meta['stimulus_name']
        ## for dominoes only
#         stim_name = stim_name.split('_')
#         stim_name[-2] += '-redyellow'
#         stim_name = '_'.join(stim_name)
        ## \end for dominoes only
        if stim_name == 'None': #recreate stimname from order in metadata
            stim_name = str(i).zfill(4)
            stim_name = _dir.split('/')[-1] + '_' + stim_name
        if stim_name[-4:] != "_img": stim_name+='_img' #stimnames need to end in "_img"
        label = meta['does_target_contact_zone']
        orig_target_hit_zone_labels[stim_name] = label
        
print("num positive labels: %d" % sum(list(orig_target_hit_zone_labels.values())))
print("num negative labels: %d" % (len(orig_target_hit_zone_labels) - sum(list(orig_target_hit_zone_labels.values()))))

num positive labels: 145
num negative labels: 132


In [84]:
len(orig_data_dirs), len(data_dirs)

(15, 15)

In [94]:
assert sorted(target_hit_zone_labels.keys()) == sorted(orig_target_hit_zone_labels.keys())
orig_labels = [orig_target_hit_zone_labels[k] for k in sorted(orig_target_hit_zone_labels.keys())]
new_labels = [target_hit_zone_labels[k] for k in sorted(target_hit_zone_labels.keys())]
sum([orig_labels[i] == new_labels[i] for i in range(len(orig_labels))])

277