In [1]:
import os, sys

import pymongo as pm
import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('talk')
sns.set_style('white')

from IPython.display import clear_output
import importlib

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
pd.options.mode.chained_assignment = None 

In [2]:
# directory & file hierarchy
proj_dir = os.path.abspath('..')
stim_dir = os.path.join(proj_dir,'stimuli')

## load in data
CDM = Children’s Discovery Museum, San Jose <br>
THU = Tsinghua University, Beijing

In [3]:
# CDM = pd.read_csv(os.path.join(stim_dir, 'CDM_photodraw_e2_svg_output2022.csv'))
# THU = pd.read_csv(os.path.join(stim_dir, 'THU_photodraw_e2_svg_output2022.csv'))
# data = CDM.append(THU)

## only grab object category data (not tracing or familiarization data)
# data_test = data.loc[data['category'].isin(['watch', 'bike', 'chair', 'car', 'tree', 'rabbit', 
#                                             'house', 'cup', 'hat', 'cat', 'bird', 'airplane'])]

In [16]:
data_test = pd.read_csv(os.path.join(stim_dir, 'all_strokes.csv'))

In [43]:
data_test.head(3)

Unnamed: 0,session_id,unique_ids,age,category,condition,ref_image_name,stroke_count,svg,filename,location
0,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,car,P,images_photocues/car_1.png,1,"<path xmlns=""http://www.w3.org/2000/svg"" d=""M2...",P_car_sketch_age5_IPAD3_THU5M7.png,THU
1,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,car,P,images_photocues/car_1.png,2,"<path xmlns=""http://www.w3.org/2000/svg"" d=""M2...",P_car_sketch_age5_IPAD3_THU5M7.png,THU
2,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,bird,P,images_photocues/bird_2.png,1,"<path xmlns=""http://www.w3.org/2000/svg"" d=""M3...",P_bird_sketch_age5_IPAD3_THU5M7.png,THU


In [40]:
## preprocessing for experimental site
data_test['location'] = np.where(data_test['unique_ids'].apply(lambda x:x.split('_')[0]) == 'photodraw', 'CDM', 'THU')

In [44]:
## cleanup SVG column
data_test['svg'] = data_test['svg'].apply(lambda x:x.split('d=')[-1])

In [45]:
## how many sketches in each dataset?
print('{} CDM sketches and {} THU sketches'.format(data_test.loc[data_test['location'] == 'CDM']['filename'].nunique(), 
                                                   data_test.loc[data_test['location'] == 'THU']['filename'].nunique()))
print('{} total sketches'.format(data_test['filename'].nunique()))

assert data_test.loc[data_test['location'] == 'CDM']['filename'].nunique() + data_test.loc[data_test['location'] == 'THU']['filename'].nunique() == data_test['filename'].nunique()

1438 CDM sketches and 1477 THU sketches
2915 total sketches


In [46]:
## first look at things at the sketch-level
df = data_test.groupby('filename').first().reset_index()

In [47]:
## how many of each category?
counts = df.groupby(['location'])['category'].value_counts()
display(counts)

location  category
CDM       chair       121
          cup         121
          hat         121
          bike        120
          bird        120
          house       120
          tree        120
          watch       120
          car         119
          cat         119
          rabbit      119
          airplane    118
THU       airplane    129
          bike        129
          bird        126
          tree        126
          chair       125
          hat         125
          cat         124
          house       121
          watch       121
          rabbit      119
          car         117
          cup         115
Name: category, dtype: int64

In [48]:
## add groups based on 4 traits: animacy, size, familiarity, and artificiality
## The code for the selection can be found here:
# https://github.com/cogtoolslab/photodraw32/blob/master/stimuli/photodraw2x2_generate_stims/select_sketchy_classes_for_photodraw32.ipynb
# create a list of our conditions
conditions = [
    ((df['category'] == 'rabbit') | (df['category'] == 'cat') | (df['category'] == 'bird')), 
    ((df['category'] == 'watch') | (df['category'] == 'cup') | (df['category'] == 'hat')), 
    ((df['category'] == 'chair') | (df['category'] == 'tree') | (df['category'] == 'house')), 
    ((df['category'] == 'bike') | (df['category'] == 'car') | (df['category'] == 'airplane')), 
]

# create a list of the values we want to assign for each condition
values = [
    'animals', 
    'small_objects', 
    'big_objects', 
    'vehicles', 
]

# create a new column and use np.select to assign values to it using our lists as arguments
df['group'] = np.select(conditions, values)

In [74]:
## how many of each category?
counts = df.groupby(['location'])['group'].value_counts()
display(counts)

counts_group = df.groupby(['group'])['group'].value_counts()
display(counts_group)

location  group        
CDM       small_objects    362
          big_objects      361
          animals          358
          vehicles         357
THU       vehicles         375
          big_objects      372
          animals          369
          small_objects    361
Name: group, dtype: int64

group          group        
animals        animals          727
big_objects    big_objects      733
small_objects  small_objects    723
vehicles       vehicles         732
Name: group, dtype: int64

In [49]:
## IMPORTANT
## which "group" are we generating metadata for?
whichGroup = df.loc[df['group'] == 'animals']
whichGroup = whichGroup.reset_index(drop=True)

## initialize empty dataframe to copy into
G = pd.DataFrame()

## copy data over
reallyRun = True
if reallyRun:
    G = whichGroup.copy()
    G = G.reset_index(drop=True)
print('Generating stims for {} dataset'.format(whichGroup['group'].unique()))

Generating stims for ['animals'] dataset


### NOTE TO HOLLY: we also want to counterbalance by site?

In [50]:
## define array params
subgroups = G['category'].unique()
l = len(subgroups)
m = min((G.groupby(['category']).size()))

## generate empty array
arr = np.zeros((m, l))


# ## define array params
# subgroups = G['location'].unique()
# l = len(subgroups)
# m = min((G.groupby(['location']).size()))

# ## generate empty array
# arr = np.zeros((m, l))

In [51]:
# shuffle sketches and put into array
for i, subgroup in enumerate(subgroups):
    perm = np.random.permutation(G.index[G['category'] == subgroup])
    arr[:, i] = perm[:m]
    
# name generated bins
G["bin"] = 0

for i in range(m):
    G.loc[arr[i,:], "bin"] = i 

In [52]:
print('How many bins were made? {}'.format(G['bin'].nunique()))

How many bins were made? 238


In [53]:
## how many sketches per bin?
checker = []
for i, bins in G.groupby('bin'):
    c = bins['filename'].nunique()
    checker.append(c)

C = pd.DataFrame(checker, columns=['numSketch'])
print('How many sketches are in each bin? {}'.format(C['numSketch'].unique()))

How many sketches are in each bin? [16  3]


##### important! split bins if necessary - if not, skip

In [54]:
## visually check the first bin
print('There are {} extra sketches in this first bin, which need to be split out'.format(len(G.loc[G['bin'] == 0])))
G.loc[G['bin'] == 0].reset_index(drop=True)

There are 16 extra sketches in this first bin, which need to be split out


Unnamed: 0,filename,session_id,unique_ids,age,category,condition,ref_image_name,stroke_count,svg,location,group,bin
0,P_bird_sketch_age6_photodraw_e21547230596884.png,,photodraw_e21547230596884,6.0,bird,P,images_photocues/bird_2.png,1,"M370.71875,369.42188l18.59346,1.11893l17.81197...",CDM,animals,0
1,P_bird_sketch_age7_IPAD1_THU7F15.png,Tsinghua_photodraw_production1575443023769,IPAD1_THU7F15,7.54,bird,P,images_photocues/bird_2.png,1,"""M320.21875,237.42188l0.1521,-2.68731l0.27254,...",THU,animals,0
2,P_bird_sketch_age8_photodraw_e21542229353364.png,,photodraw_e21542229353364,8.0,bird,P,images_photocues/bird_2.png,1,"M270.71875,246.42188l-24.07097,3.24518l-11.148...",CDM,animals,0
3,P_bird_sketch_age9_IPAD3_THU9M15.png,Tsinghua_photodraw_production1575961493402,IPAD3_THU9M15,9.18,bird,P,images_photocues/bird_3.png,1,"""M383.21875,115.42188l-4.76833,7.10418l-4.7809...",THU,animals,0
4,P_bird_sketch_age9_photodraw_e21555523952200.png,,photodraw_e21555523952200,9.0,bird,P,images_photocues/bird_1.png,1,"M105.71875,583.42188l-7.16385,4.30364l-5.83615...",CDM,animals,0
5,P_cat_sketch_age5_IPAD3_THU5F4.png,Tsinghua_photodraw_production1573781474552,IPAD3_THU5F4,5.55,cat,P,images_photocues/cat_2.png,1,"""M417.21875,357.42188l9.26042,-13.13536l6.9144...",THU,animals,0
6,P_cat_sketch_age6_photodraw_e21547238095569.png,,photodraw_e21547238095569,6.0,cat,P,images_photocues/cat_1.png,1,"M286.71875,186.42188l7.546,-0.4515l7.454,2.451...",CDM,animals,0
7,P_cat_sketch_age7_IPAD1_THU8M16.png,Tsinghua_photodraw_production1575443741291,IPAD1_THU8M16,7.53,cat,P,images_photocues/cat_1.png,1,"""M177.21875,268.42188l-11.9328,-19.01713l-10.0...",THU,animals,0
8,P_cat_sketch_age7_photodraw_e21548451831175.png,,photodraw_e21548451831175,7.0,cat,P,images_photocues/cat_2.png,1,"M395.71875,505.42188l-48,-10l-10.06728,-0.7154...",CDM,animals,0
9,S_bird_sketch_age7_IPAD1_THU7F40.png,Tsinghua_photodraw_production1576654381493,IPAD1_THU7F40,7.96,bird,S,images_photocues/bird_1.png,1,"""M164.21875,278.42188l-8,-9l-5.54885,-2.24389l...",THU,animals,0


In [55]:
import math 
num_divide_by = math.ceil(len(G.loc[G['bin'] == 0])/3)

In [56]:
## need to split the first bin that has too many sketches
need_split = G.loc[G['bin'] == 0]
need_split['filename'].nunique()

16

In [58]:
## randomly shuffle and split into 4 new bins
shuffled = need_split.sample(frac=1)
result = np.array_split(shuffled, num_divide_by)

In [60]:
## subset each new bin
result_1 = result[0]
result_2 = result[1]
result_2['bin'] = G['bin'].max() + 1
result_3 = result[2]
result_3['bin'] = G['bin'].max() + 2
result_4 = result[3]
result_4['bin'] = G['bin'].max() + 3
result_5 = result[4]
result_5['bin'] = G['bin'].max() + 4
result_6 = result[5]
result_6['bin'] = G['bin'].max() + 5
# result_7 = result[6]
# result_7['bin'] = G['bin'].max() + 6
# result_8 = result[7]
# result_8['bin'] = G['bin'].max() + 7
# result_9 = result[8]
# result_9['bin'] = G['bin'].max() + 8

display(len(result_1))
display(len(result_2))
display(len(result_3))
display(len(result_4))
display(len(result_5))
display(len(result_6))
# display(len(result_7))
# display(len(result_8))
# display(len(result_9))

3

3

3

3

2

2

In [66]:
extra_result_1 = result_2.tail(1)
extra_result_2 = result_1.tail(1)

result_6 = result_6.append(extra_result_1)
result_6['bin'] = int(result_6.iloc[0]['bin'])

result_5 = result_5.append(extra_result_2)
result_5['bin'] = int(result_5.iloc[0]['bin'])

In [68]:
## remove first bin, so that we can replace it
G = G[G.bin != 0]

# add the new bins back into the dataframe
G = pd.concat([G, result_1, 
                  result_2, 
                  result_3, 
                  result_4, 
                  result_5, 
                  result_6]).reset_index(drop=True)

In [69]:
# visually check the first bin
G.loc[G['bin'] == 0].reset_index(drop=True)

Unnamed: 0,filename,session_id,unique_ids,age,category,condition,ref_image_name,stroke_count,svg,location,group,bin
0,S_bird_sketch_age7_IPAD1_THU7F40.png,Tsinghua_photodraw_production1576654381493,IPAD1_THU7F40,7.96,bird,S,images_photocues/bird_1.png,1,"""M164.21875,278.42188l-8,-9l-5.54885,-2.24389l...",THU,animals,0
1,S_bird_sketch_age8_IPAD1_THU8F34.png,Tsinghua_photodraw_production1576481274500,IPAD1_THU8F34,8.25,bird,S,images_photocues/bird_3.png,1,"""M371.21875,309.42188l-6.88496,-10.88241l-5.11...",THU,animals,0
2,P_cat_sketch_age7_photodraw_e21548451831175.png,,photodraw_e21548451831175,7.0,cat,P,images_photocues/cat_2.png,1,"M395.71875,505.42188l-48,-10l-10.06728,-0.7154...",CDM,animals,0


In [70]:
## do we have the 4 items in each bin?
checker = []
for i, bins in G.groupby('bin'):
    c = bins['filename'].nunique()
    checker.append(c)

C = pd.DataFrame(checker, columns=['numSketch'])

print('Second check passed! There are {} items in each bin'.format(C['numSketch'].unique()))
assert C['numSketch'].unique() == G["category"].nunique()

Second check passed! There are [3] items in each bin


In [71]:
## did we retain all our sketches? 
assert G['filename'].nunique() == whichGroup['filename'].nunique()
print('Check passed! There are {} sketches still in our binned dataset'.format(G['filename'].nunique()))

Check passed! There are 727 sketches still in our binned dataset


In [75]:
## currently there are 3 sketches in each bin
## now we want to group bins, so that there are 9 sketches in a 'bin_9'
numbers = list(range(0, G['bin'].max()+1))
numbers = pd.DataFrame(numbers, columns=['bin'])
numbers['bin_9'] = np.arange(len(numbers)) // 3

regroup_df = G.merge(numbers, left_on='bin', right_on='bin')

In [76]:
print('How many bins were made? {}'.format(regroup_df['bin_9'].nunique()))

How many bins were made? 81


In [77]:
## visually look at things
regroup_df.head()

Unnamed: 0,filename,session_id,unique_ids,age,category,condition,ref_image_name,stroke_count,svg,location,group,bin,bin_9
0,P_bird_sketch_age10_IPAD2_THU10M11.png,Tsinghua_photodraw_production1575356398090,IPAD2_THU10M11,10.0,bird,P,images_photocues/bird_2.png,1,"""M270.21875,197.42188l-9.77482,-1.3484l-10.225...",THU,animals,213,71
1,P_cat_sketch_age6_IPAD3_THU7M20.png,Tsinghua_photodraw_production1576739207246,IPAD3_THU7M20,6.91,cat,P,images_photocues/cat_2.png,1,"""M243.21875,344.42188l-10.06878,-11.06119l-6.9...",THU,animals,213,71
2,S_rabbit_sketch_age4_IPAD1_THU5M4.png,Tsinghua_photodraw_production1573781508415,IPAD1_THU5M4,4.84,rabbit,S,images_photocues/rabbit_1.png,1,"""M163.21875,384.42188l-10.98135,-7.03975l-9.31...",THU,animals,213,71
3,P_bird_sketch_age4_IPAD1_THU4F10.png,Tsinghua_photodraw_production1573800407496,IPAD1_THU4F10,4.32,bird,P,images_photocues/bird_3.png,1,"""M296.21875,295.42188l3.69084,-4.30143l1.92631...",THU,animals,202,67
4,P_rabbit_sketch_age9_photodraw_e21553793354436...,,photodraw_e21553793354436,9.0,rabbit,P,images_photocues/rabbit_1.png,1,"M236.71875,313.42188l-3.42172,1.27096l-4.93799...",CDM,animals,202,67


In [78]:
## do we have the 9 items in each bin_9?
checker = []
for i, bins in regroup_df.groupby('bin_9'):
    c = bins['filename'].nunique()
    checker.append(c)

C = pd.DataFrame(checker, columns=['numSketch'])
print('There are {} items in each bin_9'.format(C['numSketch'].unique()))

There are [9] items in each bin_9


In [81]:
# reallyRun = True
# if reallyRun:    
#     ## check last bin to see if there's 8
#     # display(regroup_df.loc[regroup_df['bin_8'] == regroup_df['bin_8'].max()])
    
#     ## grab extra 4 drawings from prior bin
#     extra_4 = regroup_df.loc[regroup_df['bin_8'] == regroup_df['bin_8'].max() - 1].head(4)
    
#     ## add extra 4 to last bin 
#     extra_4['bin_8'] = regroup_df['bin_8'].max()
# #     display(extra_4)
#     regroup_df = regroup_df.append(extra_4)
    
#     ## visually check things
#     ## check last bin to see if there's 8
#     display(regroup_df.loc[regroup_df['bin_8'] == regroup_df['bin_8'].max()])
    
#     print('added 4 extra sketches to last bin')

In [79]:
print('How many bins are there now? {}'.format(regroup_df['bin_9'].nunique()))

How many bins are there now? 81


In [83]:
# from collections import Counter
# Counter(Counter(regroup_df.bin_9.value_counts().to_dict()))

### now merge at stroke-level

In [80]:
## add bin col to sketch-level animal data
full_stroke_data = data_test.merge(regroup_df, left_on='filename', right_on='filename', suffixes=('', '_drop'))

# drop the duplicate columns
full_stroke_data.drop([col for col in full_stroke_data.columns if 'drop' in col], axis=1, inplace=True)

## col cleanup
# full_stroke_data['arcLength'] = full_stroke_data['arcLength'].astype('int32')
full_stroke_data['stroke_count'] = full_stroke_data['stroke_count'].astype('int32')
# full_stroke_data['currStrokeNum'] = full_stroke_data['currStrokeNum'].astype('int32')

In [81]:
## did we retain all our sketches? 
assert full_stroke_data['filename'].nunique() == G['filename'].nunique()

In [82]:
## visually check that things make sense — this should show all sketches at the stroke-level of first bin
d = full_stroke_data.loc[full_stroke_data['bin'] == 0]
# for i, group in d.groupby('filename'):
#     display(group)

In [85]:
# final['arcLength'] = final['arcLength'].astype(str) 
# final['currStrokeNum'] = final['currStrokeNum'].astype(str) 
full_stroke_data['stroke_count'] = full_stroke_data['stroke_count'].astype(str) 
full_stroke_data['bin'] = full_stroke_data['bin'].astype(str) 
full_stroke_data['bin_9'] = full_stroke_data['bin_9'].astype(str) 

# full_stroke_data = full_stroke_data.drop(columns=['Unnamed: 0'])

In [86]:
## copy over
final = full_stroke_data.copy()

In [87]:
final.head()

Unnamed: 0,session_id,unique_ids,age,category,condition,ref_image_name,stroke_count,svg,filename,location,group,bin,bin_9
0,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,bird,P,images_photocues/bird_2.png,1,"""M370.21875,285.42188l4.85429,-12.61304l9.6910...",P_bird_sketch_age5_IPAD3_THU5M7.png,THU,animals,169,56
1,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,bird,P,images_photocues/bird_2.png,2,"""M307.21875,335.42188l6.31184,-3.21874l7.72554...",P_bird_sketch_age5_IPAD3_THU5M7.png,THU,animals,169,56
2,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,bird,P,images_photocues/bird_2.png,3,"""M173.21875,306.42188l-0.58487,-3.09276l1.5385...",P_bird_sketch_age5_IPAD3_THU5M7.png,THU,animals,169,56
3,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,bird,P,images_photocues/bird_2.png,4,"""M180.21875,349.42188l5.64603,9.73291l5.34779,...",P_bird_sketch_age5_IPAD3_THU5M7.png,THU,animals,169,56
4,Tsinghua_photodraw_production1573784627069,IPAD3_THU5M7,5.02,bird,P,images_photocues/bird_2.png,5,"""M239.21875,344.42188l2.386,3.49127l3.8246,5.5...",P_bird_sketch_age5_IPAD3_THU5M7.png,THU,animals,169,56


## add in ROIs data

In [88]:
import devphotodraw_labels as rois
importlib.reload(rois)
from devphotodraw_labels import ROIS

## convert ROI dictionary into ROI dataframe
R = pd.DataFrame(ROIS)

## add color for buttonGallery
R['color'] = '#fed541'

## convert so that can be JSON serialized later
R['part_num'] = R['part_num'].astype(str)

In [89]:
##IMPORTANT! Name dataframe according to group type
if whichGroup['group'].unique() == 'animals':
    R_subset = R.loc[R['object'].isin(['rabbit', 'cat', 'bird'])]
elif whichGroup['group'].unique() == 'small_objects':
    R_subset = R.loc[R['object'].isin(['watch', 'cup', 'hat'])]
elif whichGroup['group'].unique() == 'big_objects':
    R_subset = R.loc[R['object'].isin(['chair', 'tree', 'house'])]
elif whichGroup['group'].unique() == 'vehicles':
    R_subset = R.loc[R['object'].isin(['bike', 'car', 'airplane'])]

In [90]:
R_subset.head()

Unnamed: 0,object,label_name,part_num,color
12,cat,Tail,1,#fed541
13,cat,Eye,2,#fed541
14,cat,Ear,3,#fed541
15,cat,Leg,4,#fed541
16,cat,Whisker,5,#fed541


### generate meta to insert into mongoDB

In [91]:
#initalize list of all version dictionaries
Meta = [] 

printed=False
for name, group in final.groupby(['bin_9']):  
    display(group)
    print('Adding {} to Meta'.format(name)) 
    clear_output(wait=True)
    Batch = {} # initialize batch-level dictionary 
    Trials = [] # list of sketches, each list containing sublist of strokes & ROI information
    
    for n,g in group.groupby('filename'): # looping over sketches within a batch
        rois = R_subset[R_subset['object']==g.category.unique()[0]].to_dict(orient='records')
        strokes = g.to_dict(orient='records')
        Trials.append({'strokes': strokes, 'rois': rois}) # append list of strokes and rois to trial list
    Batch['meta'] = Trials # attaching trial list to batch metadata
    Batch['batch_id'] = np.unique(group['bin_9'].values)[0]  
    Batch['games'] = []
    Meta.append(Batch)
    
print('Done!') 

Done!


In [92]:
## check how many Meta are being inserted
len(Meta)

81

In [93]:
## does this match the number of bins that were made?
assert len(Meta) == full_stroke_data['bin_9'].nunique()*final['group'].nunique()

In [94]:
# if full dataset PER group
dataset_name = 'devphotodraw_{}'.format(whichGroup['group'][0])
print('Saving out json dictionary out to file...') 
with open('{}_meta.js'.format(dataset_name), 'w') as fout:
    json.dump(Meta, fout)
print('Done!')

Saving out json dictionary out to file...
Done!


In [95]:
#reload JSON back in to the new stimulus collection
J = json.loads(open('{}_meta.js'.format(dataset_name),mode='r').read())
print('dataset_name: {}'.format(dataset_name))
print('Length of J is: {}'.format(len(J)))

dataset_name: devphotodraw_animals
Length of J is: 81


### insert records into mongoDB
Insert records into mongodb

Run this in terminal: ssh -fNL 27017:127.0.0.1:27017 hhuey@cogtoolslab.org

In [96]:
## remember to establish tunnel to mongodb on remote server first
#### e.g. by running at the terminal, `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cogtoolslab ip address

# have to fix this to be able to analyze from local
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[dataset_name]

In [100]:
## now really insert data
reallyRun = True
if reallyRun:
    for (i,j) in enumerate(J):
        print ('%d of %d uploaded ...' % (i+1,len(J)))
        clear_output(wait=True)
        coll.insert_one(j)
print('Done!')

Done!


In [101]:
## check how many records were inserted
coll.estimated_document_count()

81

In [98]:
## inspect one of these annotation sessions
coll.find_one()

In [97]:
db.drop_collection('devphotodraw_animals')

{'ns': 'stimuli.devphotodraw_animals', 'nIndexesWas': 1, 'ok': 1.0}