In [49]:
import os, sys

import pymongo as pm
import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('talk')
sns.set_style('white')

from IPython.display import clear_output
import importlib

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
pd.options.mode.chained_assignment = None 

In [2]:
# directory & file hierarchy
proj_dir = os.path.abspath('..')
stim_dir = os.path.join(proj_dir,'stimuli')

## load in data
CDM = Children’s Discovery Museum, San Jose <br>
THU = Tsinghua University, Beijing

In [25]:
CDM = pd.read_csv(os.path.join(stim_dir, 'CDM_photodraw_e2_svg_output2022.csv'))
THU = pd.read_csv(os.path.join(stim_dir, 'THU_photodraw_e2_svg_output2022.csv'))
data = CDM.append(THU)

In [30]:
data.head()

Unnamed: 0.1,Unnamed: 0,session_id,subID,age,category,condition,filename,ref_image_name,stroke_count,svg
0,0,CDM_photodraw_e21540576831945,102618_1,age4,this square,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/square.png,1,"M224.71875,503.42188l-8.04107,2.01575l-3.82151..."
1,1,CDM_photodraw_e21540576907849,102618_1,age4,this square,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/square.png,1,"M224.71875,503.42188l-8.04107,2.01575l-3.82151..."
2,2,CDM_photodraw_e21540576907849,102618_1,age4,this shape,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/shape.png,1,"M397.71875,396.42188l4,8l1.73384,0.05863l3.071..."
3,3,CDM_photodraw_e21540576907849,102618_1,age4,this shape,S,/Users/brialong/Documents/GitHub/devphotodraw/...,images/shape.png,2,"M397.71875,396.42188l4,8l1.73384,0.05863l15.26..."
4,4,CDM_photodraw_e21540576907849,102618_1,age4,rectangle,P,/Users/brialong/Documents/GitHub/devphotodraw/...,images_photocues/rectangle.png,1,"M18.71875,1.42188l-15.07726,15.62757l-6.53569,..."


In [28]:
## how many sketches in each dataset?
print('{} CDM sketches and {} THU sketches'.format(CDM['filename'].nunique(), THU['filename'].nunique()))
print('{} total sketches'.format(data['filename'].nunique()))
assert CDM['filename'].nunique() + THU['filename'].nunique() == data['filename'].nunique()

1922 CDM sketches and 1749 THU sketches
3671 total sketches


In [53]:
## preprocessing
## only grab object category data (not tracing or familiarization data)
data_test = data.loc[data['category'].isin(['watch', 'bike', 'chair', 'car', 'tree', 'rabbit', 
                                            'house', 'cup', 'hat', 'cat', 'bird', 'airplane'])]

data_test['location'] = data_test['session_id'].apply(lambda x:x.split('_')[0])
data_test['location'] = data_test['location'].replace('Tsinghua', 'THU')

In [55]:
## how many sketches in each dataset?
print('{} CDM sketches and {} THU sketches'.format(data_test.loc[data_test['location'] == 'CDM']['filename'].nunique(), 
                                                   data_test.loc[data_test['location'] == 'THU']['filename'].nunique()))
print('{} total sketches'.format(data_test['filename'].nunique()))

assert data_test.loc[data_test['location'] == 'CDM']['filename'].nunique() + data_test.loc[data_test['location'] == 'THU']['filename'].nunique() == data_test['filename'].nunique()

1438 CDM sketches and 1291 THU sketches
2729 total sketches


In [63]:
## how many of each category?
counts = data_test.groupby('location')['category'].value_counts()
display(counts)

location  category
CDM       bike        1458
          bird        1268
          cat         1222
          watch       1086
          rabbit      1067
          car         1061
          house        994
          chair        974
          airplane     933
          tree         762
          cup          497
          hat          494
THU       bike        1303
          cat         1275
          watch       1224
          house       1166
          bird        1153
          rabbit      1087
          chair       1049
          airplane     941
          car          913
          tree         712
          hat          525
          cup          387
Name: category, dtype: int64

## add in ROIs data

In [70]:
import devphotodraw_labels as rois
importlib.reload(rois)
from devphotodraw_labels import ROIS, CAT, WATCH, HOUSE, BIRD, RABBIT, CHAIR, AIRPLANE, CAR, TREE, HAT, CUP

## convert ROI dictionary into ROI dataframe
R = pd.DataFrame(ROIS)

## add color for buttonGallery
R['color'] = '#fed541'

## convert so that can be JSON serialized later
R['part_num'] = R['part_num'].astype(str)

In [71]:
R

Unnamed: 0,object,label_name,part_num,color
0,bike,Wheel,1,#fed541
1,bike,Seat,2,#fed541
2,bike,Handle,3,#fed541
3,bike,Pedal,4,#fed541
4,bike,Chain,5,#fed541
...,...,...,...,...
110,cup,Handle,1,#fed541
111,cup,Body,2,#fed541
112,cup,Base,3,#fed541
113,cup,Rim,4,#fed541
