## import modules

In [28]:
import pymongo as pm
import pandas as pd
import json
from IPython.display import clear_output

## connect to db

#### (1) establish tunnel to lab server
Run this in terminal:
`alias tunnel_cogtoolslab="ssh -fNL 27017:127.0.0.1:27017 USER@cogtoolslab.org"`

#### (2) establish connection to target db and collection

In [23]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## cocolab ip address

conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['gallerize']
coll = db['kiddraw']

## load in metadata as json and insert into mongo

In [25]:
## load in metadata CSV
out_path = 'parsed.csv'
x = pd.read_csv(out_path)

## convert to json
y = x.to_json(orient='records')
z = json.loads(y)

In [29]:
## inspect example of record to make sure it looks right before inserting new data
z[0]

{u'age': 5,
 u'class': u'chair',
 u'expID': u'cdmrunv3',
 u'filename': u'chair_sketch_age5_cdm_run_v31530732171052.png',
 u'sessionID': u'1530732171052.png',
 u'valid': 1}

In [32]:
## loop through list of records and insert each into collection
reallyRun = 1
if reallyRun:
    for i,_z in enumerate(z):
        coll.insert_one(_z)
        print('Inserted {} records.'.format(i))
        clear_output(wait=True)
else:
    print('Did not insert any new data.')

Inserted 753 records.


### inspect & validate collection (to make sure records look right)

In [34]:
print('There are {} records in this collection.'.format(coll.count()))

There are 754 records in this collection.


In [44]:
class_list = coll.distinct('class')
for i,this_class in enumerate(class_list):
    print('There are {} {} records in this collection.'.format(coll.find({'class':this_class}).count(),this_class))

There are 28 chair records in this collection.
There are 28 chair records in this collection.
There are 43 house records in this collection.
There are 51 phone records in this collection.
There are 28 cat records in this collection.
There are 31 airplane records in this collection.
There are 28 rabbit records in this collection.
There are 27 bird records in this collection.
There are 27 bowl records in this collection.
There are 31 train records in this collection.
There are 26 bike records in this collection.
There are 28 scissors records in this collection.
There are 50 boat records in this collection.
There are 21 bear records in this collection.
There are 28 couch records in this collection.
There are 43 tree records in this collection.
There are 39 person records in this collection.
There are 33 car records in this collection.
There are 45 dog records in this collection.
There are 44 fish records in this collection.
There are 39 cup records in this collection.
There are 25 sheep r

In [47]:
age_list = sorted(coll.distinct('age'))
for i,this_age in enumerate(age_list):
    print('There are {} records from {}-year-olds in this collection.'.format(coll.find({'age':this_age}).count(),this_age))

There are 67 records from 2-year-olds in this collection.
There are 173 records from 3-year-olds in this collection.
There are 116 records from 4-year-olds in this collection.
There are 142 records from 5-year-olds in this collection.
There are 60 records from 6-year-olds in this collection.
There are 46 records from 7-year-olds in this collection.
There are 91 records from 8-year-olds in this collection.
There are 14 records from 9-year-olds in this collection.
There are 45 records from 10-year-olds in this collection.
