In [31]:
import os
import csv
import pandas as pd
import numpy as np
from IPython.display import clear_output
import ast
import pymongo as pm
from datetime import datetime


### Load connection to database

In [65]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the stanford-cogsci.org user
pswd = auth.values[0][0]
conn = pm.MongoClient('mongodb://stanford:' + pswd + '@127.0.0.1')

In [66]:
db = conn['devphotodraw_recognition']
coll = db['batched_12afc']

In [67]:
analysis_dir = os.getcwd()
output_dir = os.path.join(analysis_dir,'csv_out')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

### Only prolific workers have id's that are strings (parsed from url)

In [112]:
this_version = 'batch21_production_june2021'

catch_responses = []
ids = []
catch = coll.find({'sub_id': {'$type' : "string"}, 'version': this_version, 'dataType': 'catch_trial' })
for c in catch:
   catch_responses.append(c['catch'])
   ids.append(c['sub_id'])

check = pd.DataFrame([catch_responses, ids])
check



                                        

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Choosing what the pictures are.,identifying doodles with one of twelve choices,Guessing what a drawing is depicting,judge what the drawing were by certain subjects,guessing drawings,label drawings from the choices given,guess drawings,identify objects from drawings,Making guesses as to what a drawing might be of.,choosing what each drawing looks like
1,6095fc75ab1d101e060d808,60b85d86147b19b7f8b0c84,606a27ecc448bcda408cee1,5d1ea7e579b687001af802c,60cb71bff8a2183f0d5c659,607ba74574a49a21a181163,5b5c3b266b25590001778ea,5e9c4666d28cb0023c8d43a,5f5eb22e58c9fb7782a6e90,596c1af50e679d0001faca6


In [113]:
commments_check=[]
comments = coll.find({'sub_id': {'$type' : "string"}, 'version': this_version, 'dataType': 'comments' })
for c in comments:
   commments_check.append(c['comments'])
   ids.append(c['sub_id'])

commments_checks = pd.DataFrame([commments_check, ids])
commments_checks

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,No,No,Some drawings did not depict any of the choice...,no comments,No,it was kind of tricky,,no,,A fun survey,,,,,,,,,,
1,6095fc75ab1d101e060d808,60b85d86147b19b7f8b0c84,606a27ecc448bcda408cee1,5d1ea7e579b687001af802c,60cb71bff8a2183f0d5c659,607ba74574a49a21a181163,5b5c3b266b25590001778ea,5e9c4666d28cb0023c8d43a,5f5eb22e58c9fb7782a6e90,596c1af50e679d0001faca6,6095fc75ab1d101e060d808,60b85d86147b19b7f8b0c84,606a27ecc448bcda408cee1,5d1ea7e579b687001af802c,607ba74574a49a21a181163,60cb71bff8a2183f0d5c659,5b5c3b266b25590001778ea,5e9c4666d28cb0023c8d43a,5f5eb22e58c9fb7782a6e90,596c1af50e679d0001faca6


In [100]:
commments_check

[u'nope',
 u'no',
 u'No.',
 u'no',
 u"I'm talking crap about the drawings but I have about the same artistic capability that was shown to me here lol",
 u'',
 u'some of the drawings were really difficult to make out',
 u'So of those drawings were pretty bad...',
 u'',
 u'nice study']

In [114]:
# prolific_workers = coll.find({'sub_id': {'$type' : "string"}, 'version': 'batch1_production_june2021' }).distinct('sub_id')
prolific_workers = coll.find({'sub_id': {'$type' : "string"}, 'version': this_version }).distinct('sub_id')
np.size(prolific_workers)
prolific_workers

[u'6095fc75ab1d101e060d808',
 u'60cb71bff8a2183f0d5c659',
 u'5d1ea7e579b687001af802c',
 u'606a27ecc448bcda408cee1',
 u'60b85d86147b19b7f8b0c84',
 u'607ba74574a49a21a181163',
 u'5e9c4666d28cb0023c8d43a',
 u'596c1af50e679d0001faca6',
 u'5f5eb22e58c9fb7782a6e90',
 u'5b5c3b266b25590001778ea']

In [115]:
if (this_version == 'batch5_production_june2021'):
    del prolific_workers[0]
    
if (this_version == 'batch17_production_june2021'):
    del prolific_workers[-1]


###### Open up variables for CSV writing
# basic descriptors
subID = []; 
imageName = []
guessed_category = []
drawn_category = []
version = []

In [116]:
for w in prolific_workers:
    print w
    entries = coll.find({'$and': [{'sub_id': w, 'dataType': 'recognition_rating'}]})
    num_trials =  entries.count()
    print num_trials
    for e in entries:
        subID.append(e['sub_id'])
        imageName.append(e['imageName'])
        guessed_category.append(e['guessed_category'])
        drawn_category.append(e['imageCategory'])
        version.append(e['version'])

6095fc75ab1d101e060d808
104
60cb71bff8a2183f0d5c659
104
5d1ea7e579b687001af802c
104
606a27ecc448bcda408cee1
104
60b85d86147b19b7f8b0c84
104
607ba74574a49a21a181163
104
5e9c4666d28cb0023c8d43a
104
596c1af50e679d0001faca6
104
5f5eb22e58c9fb7782a6e90
104
5b5c3b266b25590001778ea
104


In [117]:

## write out csv 
X_out = pd.DataFrame([subID, imageName, guessed_category, drawn_category, version])
X_out = X_out.transpose()
X_out.columns = ['subID','imageName', 'guessed_category','drawn_category','version']
X_out.to_csv(os.path.join(output_dir, this_version + '.csv'))


In [41]:
# path = 'P_bird_sketch_age5_IPAD5_THU5F3.png'
# path = 'P_airplane_sketch_age4_CDM_photodraw_e21547242197593.png'
# condition = path.split('/')[-1].split('_')[4]
# if path.find('CDM')!=-1:
#     site = 'CDM'
# else:
#     site = 'THU'
# site

In [None]:
# mturk_id
# assignment_id
# trial_id
# hit_id