In [1]:
import os, sys

import pymongo as pm
import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('talk')
sns.set_style('white')

from IPython.display import clear_output
import importlib

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
pd.options.mode.chained_assignment = None 

In [2]:
# directory & file hierarchy
proj_dir = os.path.abspath('..')
stim_dir = os.path.join(proj_dir,'stimuli')

## load in data
CDM = Children’s Discovery Museum, San Jose <br>
THU = Tsinghua University, Beijing

In [3]:
CDM = pd.read_csv(os.path.join(stim_dir, 'CDM_photodraw_e2_svg_output2022.csv'))
THU = pd.read_csv(os.path.join(stim_dir, 'THU_photodraw_e2_svg_output2022.csv'))
data = CDM.append(THU)

In [4]:
data.head()

Unnamed: 0.1,Unnamed: 0,session_id,subID,age,category,condition,filename,ref_image_name,stroke_count,svg
0,0,CDM_photodraw_e21540576831945,102618_1,age4,this square,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/square.png,1,"M224.71875,503.42188l-8.04107,2.01575l-3.82151..."
1,1,CDM_photodraw_e21540576907849,102618_1,age4,this square,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/square.png,1,"M224.71875,503.42188l-8.04107,2.01575l-3.82151..."
2,2,CDM_photodraw_e21540576907849,102618_1,age4,this shape,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/shape.png,1,"M397.71875,396.42188l4,8l1.73384,0.05863l3.071..."
3,3,CDM_photodraw_e21540576907849,102618_1,age4,this shape,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/shape.png,2,"M397.71875,396.42188l4,8l1.73384,0.05863l15.26..."
4,4,CDM_photodraw_e21540576907849,102618_1,age4,rectangle,P,/Users/brialong/Documents/GitHub/devphotodraw/...,images_photocues/rectangle.png,1,"M18.71875,1.42188l-15.07726,15.62757l-6.53569,..."


In [53]:
## preprocessing
## only grab object category data (not tracing or familiarization data)
data_test = data.loc[data['category'].isin(['watch', 'bike', 'chair', 'car', 'tree', 'rabbit', 
                                            'house', 'cup', 'hat', 'cat', 'bird', 'airplane'])]

data_test['location'] = data_test['session_id'].apply(lambda x:x.split('_')[0])
data_test['location'] = data_test['location'].replace('Tsinghua', 'THU')

data_test = data_test.reset_index(drop=True)

In [57]:
## cleanup SVG column
data_test['svg'] = data_test['svg'].apply(lambda x:x.split('d=')[-1])

In [58]:
## how many sketches in each dataset?
print('{} CDM sketches and {} THU sketches'.format(data_test.loc[data_test['location'] == 'CDM']['filename'].nunique(), 
                                                   data_test.loc[data_test['location'] == 'THU']['filename'].nunique()))
print('{} total sketches'.format(data_test['filename'].nunique()))

assert data_test.loc[data_test['location'] == 'CDM']['filename'].nunique() + data_test.loc[data_test['location'] == 'THU']['filename'].nunique() == data_test['filename'].nunique()

1438 CDM sketches and 1291 THU sketches
2729 total sketches


In [59]:
## first look at things at the sketch-level
df = data_test.groupby('filename').first().reset_index()

In [60]:
## how many of each category?
counts = df.groupby(['location'])['category'].value_counts()
display(counts)

location  category
CDM       chair       121
          cup         121
          hat         121
          bike        120
          bird        120
          house       120
          tree        120
          watch       120
          car         119
          cat         119
          rabbit      119
          airplane    118
THU       bike        113
          airplane    112
          bird        110
          cat         109
          chair       109
          tree        109
          hat         108
          house       107
          rabbit      105
          watch       104
          cup         103
          car         102
Name: category, dtype: int64

In [61]:
## add groups based on 4 traits: animacy, size, familiarity, and artificiality
## The code for the selection can be found here:
# https://github.com/cogtoolslab/photodraw32/blob/master/stimuli/photodraw2x2_generate_stims/select_sketchy_classes_for_photodraw32.ipynb
# create a list of our conditions
conditions = [
    ((df['category'] == 'rabbit') | (df['category'] == 'cat') | (df['category'] == 'bird')), 
    ((df['category'] == 'watch') | (df['category'] == 'cup') | (df['category'] == 'hat')), 
    ((df['category'] == 'chair') | (df['category'] == 'tree') | (df['category'] == 'house')), 
    ((df['category'] == 'bike') | (df['category'] == 'car') | (df['category'] == 'airplane')), 
]

# create a list of the values we want to assign for each condition
values = [
    'animals', 
    'small_objects', 
    'big_objects', 
    'vehicles', 
]

# create a new column and use np.select to assign values to it using our lists as arguments
df['group'] = np.select(conditions, values)

In [62]:
## IMPORTANT
## which "group" are we generating metadata for?
whichGroup = df.loc[df['group'] == 'animals']
whichGroup = whichGroup.reset_index(drop=True)

## initialize empty dataframe to copy into
G = pd.DataFrame()

## copy data over
reallyRun = True
if reallyRun:
    G = whichGroup.copy()
    G = G.reset_index(drop=True)
print('Generating stims for {} dataset'.format(whichGroup['group'].unique()))

Generating stims for ['animals'] dataset


### NOTE TO HOLLY: we also want to counterbalance by site?

In [63]:
## define array params
subgroups = G['category'].unique()
l = len(subgroups)
m = min((G.groupby(['category']).size()))

## generate empty array
arr = np.zeros((m, l))


# ## define array params
# subgroups = G['location'].unique()
# l = len(subgroups)
# m = min((G.groupby(['location']).size()))

# ## generate empty array
# arr = np.zeros((m, l))

In [64]:
# shuffle sketches and put into array
for i, subgroup in enumerate(subgroups):
    perm = np.random.permutation(G.index[G['category'] == subgroup])
    arr[:, i] = perm[:m]
    
# name generated bins
G["bin"] = 0

for i in range(m):
    G.loc[arr[i,:], "bin"] = i 

In [65]:
print('How many bins were made? {}'.format(G['bin'].nunique()))

How many bins were made? 224


In [66]:
## how many sketches per bin?
checker = []
for i, bins in G.groupby('bin'):
    c = bins['filename'].nunique()
    checker.append(c)

C = pd.DataFrame(checker, columns=['numSketch'])
print('How many sketches are in each bin? {}'.format(C['numSketch'].unique()))

How many sketches are in each bin? [13  3]


##### important! split bins if necessary - if not, skip

In [67]:
## visually check the first bin
print('There are {} extra sketches in this first bin, which need to be split out'.format(len(G.loc[G['bin'] == 0])))
G.loc[G['bin'] == 0].reset_index(drop=True)

There are 13 extra sketches in this first bin, which need to be split out


Unnamed: 0.1,filename,Unnamed: 0,session_id,subID,age,category,condition,ref_image_name,stroke_count,svg,location,group,bin
0,/Users/brialong/Documents/GitHub/devphotodraw/...,722,CDM_photodraw_e21541613545900,110718_1,age6,bird,P,images_photocues/bird_2.png,1,"M332.71875,237.42188l-19.6595,1.42627l-19.1492...",CDM,animals,0
1,/Users/brialong/Documents/GitHub/devphotodraw/...,6998,CDM_photodraw_e21547848462737,011819_7,age7,bird,P,images_photocues/bird_3.png,1,"M328.71875,426.42188l-14.57565,-24.38115l-15.4...",CDM,animals,0
2,/Users/brialong/Documents/GitHub/devphotodraw/...,5997,CDM_photodraw_e21547672389753,011619_6,age4,bird,S,images_photocues/bird_2.png,1,"M150.71875,118.42188l-13,9l-4.8338,9.00207l-3....",CDM,animals,0
3,/Users/brialong/Documents/GitHub/devphotodraw/...,4765,CDM_photodraw_e21547238095569,011109_5,age6,bird,S,images_photocues/bird_3.png,1,"M450.71875,355.42188l-12.30076,-21.65824l-11.6...",CDM,animals,0
4,/Users/brialong/Documents/GitHub/devphotodraw/...,2090,CDM_photodraw_e21542224706912,111418_6,age7,bird,S,images_photocues/bird_3.png,1,"M81.71875,171.42188l17.69992,-17.8237l18.40754...",CDM,animals,0
5,/Users/brialong/Documents/GitHub/devphotodraw/...,11874,CDM_photodraw_e21555102208611,041219_1,age9,cat,P,images_photocues/cat_3.png,1,"M442.71875,290.42188l-0.52135,8.35889l-1.86892...",CDM,animals,0
6,/Users/brialong/Documents/GitHub/devphotodraw/...,8043,CDM_photodraw_e21548104824451,012119_8,age8,cat,S,images_photocues/cat_2.png,1,"M193.71875,383.42188l4,-9l12.8649,-10.32489l15...",CDM,animals,0
7,/Users/brialong/Documents/GitHub/devphotodraw/...,8660,CDM_photodraw_e21548449754672,012519_3,age8,cat,S,images_photocues/cat_2.png,1,"M322.71875,220.42188l-12,11l-1.71155,3.58969l-...",CDM,animals,0
8,THU_sketches_and_metadata/bird_sketch_Ipad4_TH...,1903,Tsinghua_photodraw_production1573785392477,Ipad4_THU4F8,4.57,bird,P,images_photocues/bird_3.png,1,"""M109.21875,319.51563l2.50756,-3.40526l3.62049...",THU,animals,0
9,THU_sketches_and_metadata/bird_sketch_iPad 5-T...,167,Tsinghua_photodraw_production1577171766191,iPad 5-THU9M2,8.01,bird,S,images_photocues/bird_3.png,1,"""M151.21875,284.51563l-16.19297,11.08439l-11.2...",THU,animals,0


In [68]:
import math 
num_divide_by = math.ceil(len(G.loc[G['bin'] == 0])/3)

In [69]:
## need to split the first bin that has too many sketches
need_split = G.loc[G['bin'] == 0]
need_split['filename'].nunique()

13

In [70]:
## randomly shuffle and split into 4 new bins
shuffled = need_split.sample(frac=1)
result = np.array_split(shuffled, num_divide_by)

In [71]:
## subset each new bin
result_1 = result[0]
result_2 = result[1]
result_2['bin'] = G['bin'].max() + 1
result_3 = result[2]
result_3['bin'] = G['bin'].max() + 2
result_4 = result[3]
result_4['bin'] = G['bin'].max() + 3
result_5 = result[4]
result_5['bin'] = G['bin'].max() + 4
# result_6 = result[5]
# result_6['bin'] = G['bin'].max() + 5
# result_7 = result[6]
# result_7['bin'] = G['bin'].max() + 6
# result_8 = result[7]
# result_8['bin'] = G['bin'].max() + 7
# result_9 = result[8]
# result_9['bin'] = G['bin'].max() + 8

display(len(result_1))
display(len(result_2))
display(len(result_3))
display(len(result_4))
display(len(result_5))
# display(len(result_6))
# display(len(result_7))
# display(len(result_8))
# display(len(result_9))

3

3

3

2

2

In [72]:
extra_result_1 = result_2.tail(1)
extra_result_2 = result_1.tail(1)

result_4 = result_4.append(extra_result_1)
result_4['bin'] = int(result_4.iloc[0]['bin'])

result_5 = result_5.append(extra_result_2)
result_5['bin'] = int(result_5.iloc[0]['bin'])

In [73]:
## remove first bin, so that we can replace it
G = G[G.bin != 0]

# add the new bins back into the dataframe
G = pd.concat([G, result_1, 
                  result_2, 
                  result_3, 
                  result_4, 
                  result_5]).reset_index(drop=True)

In [74]:
# visually check the first bin
G.loc[G['bin'] == 0].reset_index(drop=True)

Unnamed: 0.1,filename,Unnamed: 0,session_id,subID,age,category,condition,ref_image_name,stroke_count,svg,location,group,bin
0,/Users/brialong/Documents/GitHub/devphotodraw/...,11874,CDM_photodraw_e21555102208611,041219_1,age9,cat,P,images_photocues/cat_3.png,1,"M442.71875,290.42188l-0.52135,8.35889l-1.86892...",CDM,animals,0
1,/Users/brialong/Documents/GitHub/devphotodraw/...,5997,CDM_photodraw_e21547672389753,011619_6,age4,bird,S,images_photocues/bird_2.png,1,"M150.71875,118.42188l-13,9l-4.8338,9.00207l-3....",CDM,animals,0
2,THU_sketches_and_metadata/cat_sketch_Ipad2 _TH...,5352,Tsinghua_photodraw_production1575530375673,Ipad2 _THU6F20,6.51,cat,S,images_photocues/cat_2.png,1,"""M193.21875,440.42188l-15.15318,1.19815l-13.11...",THU,animals,0


In [75]:
## do we have the 4 items in each bin?
checker = []
for i, bins in G.groupby('bin'):
    c = bins['filename'].nunique()
    checker.append(c)

C = pd.DataFrame(checker, columns=['numSketch'])

print('Second check passed! There are {} items in each bin'.format(C['numSketch'].unique()))
assert C['numSketch'].unique() == G["category"].nunique()

Second check passed! There are [3] items in each bin


In [76]:
## did we retain all our sketches? 
assert G['filename'].nunique() == whichGroup['filename'].nunique()
print('Check passed! There are {} sketches still in our binned dataset'.format(G['filename'].nunique()))

Check passed! There are 682 sketches still in our binned dataset


In [77]:
## currently there are 3 sketches in each bin
## now we want to group bins, so that there are 9 sketches in a 'bin_9'
numbers = list(range(0, G['bin'].max()+1))
numbers = pd.DataFrame(numbers, columns=['bin'])
numbers['bin_9'] = np.arange(len(numbers)) // 3

regroup_df = G.merge(numbers, left_on='bin', right_on='bin')

In [78]:
print('How many bins were made? {}'.format(regroup_df['bin_9'].nunique()))

How many bins were made? 76


In [79]:
## visually look at things
regroup_df.head()

Unnamed: 0.1,filename,Unnamed: 0,session_id,subID,age,category,condition,ref_image_name,stroke_count,svg,location,group,bin,bin_9
0,/Users/brialong/Documents/GitHub/devphotodraw/...,886,CDM_photodraw_e21541799408496,110918_1,age4,bird,P,images_photocues/bird_3.png,1,"M381.71875,525.42188l2.64745,-29.2329l-3.00642...",CDM,animals,153,51
1,/Users/brialong/Documents/GitHub/devphotodraw/...,3540,CDM_photodraw_e21547060282520,O10919_1,age4,cat,S,images_photocues/cat_3.png,1,"M113.71875,415.42188l-0.54331,23.5977l-1.45669...",CDM,animals,153,51
2,THU_sketches_and_metadata/rabbit_sketch_iPad 1...,6771,Tsinghua_photodraw_production1577171554498,iPad 1_THU9M2,9.03,rabbit,S,images_photocues/rabbit_2.png,1,"""M243.21875,306.42188l-2.65361,-13.19572l-0.78...",THU,animals,153,51
3,/Users/brialong/Documents/GitHub/devphotodraw/...,4008,CDM_photodraw_e21547070395780,010919_7,age4,bird,P,images_photocues/bird_1.png,1,"M145.71875,291.42188l4.14908,8.73997l5.95342,1...",CDM,animals,177,59
4,/Users/brialong/Documents/GitHub/devphotodraw/...,7306,CDM_photodraw_e21548096119835,012119_2,age8,rabbit,S,images_photocues/rabbit_1.png,1,"M481.71875,465.42188l5.5896,-14.05739l2.39503,...",CDM,animals,177,59


In [80]:
## do we have the 9 items in each bin_9?
checker = []
for i, bins in regroup_df.groupby('bin_9'):
    c = bins['filename'].nunique()
    checker.append(c)

C = pd.DataFrame(checker, columns=['numSketch'])
print('There are {} items in each bin_9'.format(C['numSketch'].unique()))

There are [9] items in each bin_9


In [81]:
# reallyRun = True
# if reallyRun:    
#     ## check last bin to see if there's 8
#     # display(regroup_df.loc[regroup_df['bin_8'] == regroup_df['bin_8'].max()])
    
#     ## grab extra 4 drawings from prior bin
#     extra_4 = regroup_df.loc[regroup_df['bin_8'] == regroup_df['bin_8'].max() - 1].head(4)
    
#     ## add extra 4 to last bin 
#     extra_4['bin_8'] = regroup_df['bin_8'].max()
# #     display(extra_4)
#     regroup_df = regroup_df.append(extra_4)
    
#     ## visually check things
#     ## check last bin to see if there's 8
#     display(regroup_df.loc[regroup_df['bin_8'] == regroup_df['bin_8'].max()])
    
#     print('added 4 extra sketches to last bin')

In [82]:
print('How many bins are there now? {}'.format(regroup_df['bin_9'].nunique()))

How many bins are there now? 76


In [83]:
# from collections import Counter
# Counter(Counter(regroup_df.bin_9.value_counts().to_dict()))

### now merge at stroke-level

In [84]:
## add bin col to sketch-level animal data
full_stroke_data = data_test.merge(regroup_df, left_on='filename', right_on='filename', suffixes=('', '_drop'))

# drop the duplicate columns
full_stroke_data.drop([col for col in full_stroke_data.columns if 'drop' in col], axis=1, inplace=True)

## col cleanup
# full_stroke_data['arcLength'] = full_stroke_data['arcLength'].astype('int32')
full_stroke_data['stroke_count'] = full_stroke_data['stroke_count'].astype('int32')
# full_stroke_data['currStrokeNum'] = full_stroke_data['currStrokeNum'].astype('int32')

In [85]:
## did we retain all our sketches? 
assert full_stroke_data['filename'].nunique() == G['filename'].nunique()

In [86]:
## visually check that things make sense — this should show all sketches at the stroke-level of first bin
d = full_stroke_data.loc[full_stroke_data['bin'] == 0]
# for i, group in d.groupby('filename'):
#     display(group)

In [87]:
# final['arcLength'] = final['arcLength'].astype(str) 
# final['currStrokeNum'] = final['currStrokeNum'].astype(str) 
full_stroke_data['stroke_count'] = full_stroke_data['stroke_count'].astype(str) 
full_stroke_data['bin'] = full_stroke_data['bin'].astype(str) 
full_stroke_data['bin_9'] = full_stroke_data['bin_9'].astype(str) 

full_stroke_data = full_stroke_data.drop(columns=['Unnamed: 0'])

In [88]:
## copy over
final = full_stroke_data.copy()

In [89]:
final.head()

Unnamed: 0,session_id,subID,age,category,condition,filename,ref_image_name,stroke_count,svg,location,group,bin,bin_9
0,CDM_photodraw_e21540576907849,102618_1,age4,rabbit,P,/Users/brialong/Documents/GitHub/devphotodraw/...,images_photocues/rabbit_1.png,1,"M634.71875,474.42188l-21.35678,12.64963l-22.64...",CDM,animals,145,48
1,CDM_photodraw_e21540576907849,102618_1,age4,rabbit,P,/Users/brialong/Documents/GitHub/devphotodraw/...,images_photocues/rabbit_1.png,2,"M590.71875,446.42188l-9.94288,-2.39073l-10.581...",CDM,animals,145,48
2,CDM_photodraw_e21540576907849,102618_1,age4,rabbit,P,/Users/brialong/Documents/GitHub/devphotodraw/...,images_photocues/rabbit_1.png,3,"M601.71875,339.42188l-8.21063,-3.25486l-8.9780...",CDM,animals,145,48
3,CDM_photodraw_e21540576907849,102618_1,age4,rabbit,P,/Users/brialong/Documents/GitHub/devphotodraw/...,images_photocues/rabbit_1.png,4,"M257.71875,502.42188l-15.71523,3.61524l-15.153...",CDM,animals,145,48
4,CDM_photodraw_e21540576907849,102618_1,age4,rabbit,P,/Users/brialong/Documents/GitHub/devphotodraw/...,images_photocues/rabbit_1.png,5,"M266.71875,577.42188l-13.02625,8.48312l-16.265...",CDM,animals,145,48


## add in ROIs data

In [90]:
import devphotodraw_labels as rois
importlib.reload(rois)
from devphotodraw_labels import ROIS

## convert ROI dictionary into ROI dataframe
R = pd.DataFrame(ROIS)

## add color for buttonGallery
R['color'] = '#fed541'

## convert so that can be JSON serialized later
R['part_num'] = R['part_num'].astype(str)

In [91]:
##IMPORTANT! Name dataframe according to group type
if whichGroup['group'].unique() == 'animals':
    R_subset = R.loc[R['object'].isin(['rabbit', 'cat', 'bird'])]
elif whichGroup['group'].unique() == 'small_objects':
    R_subset = R.loc[R['object'].isin(['watch', 'cup', 'hat'])]
elif whichGroup['group'].unique() == 'big_objects':
    R_subset = R.loc[R['object'].isin(['chair', 'tree', 'house'])]
elif whichGroup['group'].unique() == 'vehicles':
    R_subset = R.loc[R['object'].isin(['bike', 'car', 'airplane'])]

In [92]:
R_subset.head()

Unnamed: 0,object,label_name,part_num,color
12,cat,Tail,1,#fed541
13,cat,Eye,2,#fed541
14,cat,Ear,3,#fed541
15,cat,Leg,4,#fed541
16,cat,Whisker,5,#fed541


### generate meta to insert into mongoDB

In [94]:
#initalize list of all version dictionaries
Meta = [] 

printed=False
for name, group in final.groupby(['bin_9']):  
    display(group)
    print('Adding {} to Meta'.format(name)) 
    clear_output(wait=True)
    Batch = {} # initialize batch-level dictionary 
    Trials = [] # list of sketches, each list containing sublist of strokes & ROI information
    
    for n,g in group.groupby('filename'): # looping over sketches within a batch
        rois = R_subset[R_subset['object']==g.category.unique()[0]].to_dict(orient='records')
        strokes = g.to_dict(orient='records')
        Trials.append({'strokes': strokes, 'rois': rois}) # append list of strokes and rois to trial list
    Batch['meta'] = Trials # attaching trial list to batch metadata
    Batch['batch_id'] = np.unique(group['bin_9'].values)[0]  
    Batch['games'] = []
    Meta.append(Batch)
    
print('Done!') 

Done!


In [42]:
## check how many Meta are being inserted
len(Meta)

76

In [95]:
## does this match the number of bins that were made?
assert len(Meta) == full_stroke_data['bin_9'].nunique()*final['group'].nunique()

In [96]:
# if full dataset PER group
dataset_name = 'devphotodraw_{}'.format(whichGroup['group'][0])
print('Saving out json dictionary out to file...') 
with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(Meta, fout)
print('Done!')

Saving out json dictionary out to file...
Done!


In [97]:
#reload JSON back in to the new stimulus collection
J = json.loads(open('{}_meta.js'.format(dataset_name),mode='r').read())
print('dataset_name: {}'.format(dataset_name))
print('Length of J is: {}'.format(len(J)))

dataset_name: devphotodraw_animals
Length of J is: 76


### insert records into mongoDB
Insert records into mongodb

Run this in terminal: ssh -fNL 27017:127.0.0.1:27017 hhuey@cogtoolslab.org

In [None]:
## remember to establish tunnel to mongodb on remote server first
#### e.g. by running at the terminal, `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [49]:
## now really insert data
reallyRun = True
if reallyRun:
    for (i,j) in enumerate(J):
        print ('%d of %d uploaded ...' % (i+1,len(J)))
        clear_output(wait=True)
        coll.insert_one(j)
print('Done!')

Done!


In [50]:
## check how many records were inserted
coll.estimated_document_count()

76

In [51]:
## inspect one of these annotation sessions
coll.find_one()

{'_id': ObjectId('628c2e31d5b8e10848929561'),
 'meta': [{'strokes': [{'session_id': 'CDM_photodraw_e21547240775263',
     'subID': '011119_7',
     'age': 'age6',
     'category': 'bird',
     'condition': 'P',
     'filename': '/Users/brialong/Documents/GitHub/devphotodraw/analysis/step1_get_drawings/CDM_Drawings/photodraw2/photodraw2_all_sketches_svg_copy/bird/P_bird_sketch_age6_CDM_photodraw_e21547240775263.png',
     'ref_image_name': 'images_photocues/bird_3.png',
     'stroke_count': '1',
     'svg': 'M263.71875,297.42188l-10.26556,-1.55833l-9.7741,0.53426l-19.96034,2.02407l-15,2l-11,4l-7.02413,2.01064l-9.10835,2.37378l-8.79211,2.43157l-6.07541,2.18401l-4.46207,2.52287l-1.66116,1.99206l0.12323,2.48507l3.23972,2.95868l5.93161,3.74527l6.34594,3.6433l4.48273,2.65275l28.07682,16.58636l29.92318,11.41364l7.68726,2.94969l8.98044,4.8124l8.39659,5.43135l5.9357,4.80656l2.75147,1.87733l3.24853,1.12267v-4',
     'location': 'CDM',
     'group': 'animals',
     'bin': '0',
     'bin_9': '0'},

In [93]:
# db.drop_collection('devphotodraw_animals')

{'ns': 'stimuli.devphotodraw_animals', 'nIndexesWas': 1, 'ok': 1.0}