In [1]:
from __future__ import division

import os
import urllib, cStringIO

import pymongo as pm

import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re

from PIL import Image
import base64
import sys

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('talk')
sns.set_style('white')

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

## setup

In [2]:
# directory & file hierarchy
proj_dir = os.path.abspath('../../..')
analysis_dir = os.getcwd()
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))

## add helpers to python path
if os.path.join(proj_dir,'analysis','python') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'analysis','python'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       
    
# Assign variables within imported analysis helpers
import df_generation_helpers as h
if sys.version_info[0]>=3:
    from importlib import reload
reload(h)

<module 'df_generation_helpers' from '/Users/judithfan/graphical_conventions/analysis/python/df_generation_helpers.pyc'>

In [3]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'rxdhawkins.me' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['3dObjects']
coll = db['graphical_conventions']

# which iteration name should we use?
iterationName1 = 'run3_size4_waiting'
iterationName2 = 'run4_generalization'

In [4]:
## list of researcher mturk worker ID's to ignore
jefan = ['A1MMCS8S8CTWKU','A1MMCS8S8CTWKV','A1MMCS8S8CTWKS']
hawkrobe = ['A1BOIDKD33QSDK']
megsano = ['A1DVQQLVZR7W6I']
researchers = jefan + hawkrobe + megsano 

In [5]:
## run 3 - get total number of stroke and clickedObj events in the collection as a whole
S1 = coll.find({ '$and': [{'iterationName':iterationName1}, {'eventType': 'stroke'}]}).sort('time')
C1 = coll.find({ '$and': [{'iterationName':iterationName1}, {'eventType': 'clickedObj'}]}).sort('time')

## run 4 - get total number of stroke and clickedObj events in the collection as a whole
S2 = coll.find({ '$and': [{'iterationName':iterationName2}, {'eventType': 'stroke'}]}).sort('time')
C2 = coll.find({ '$and': [{'iterationName':iterationName2}, {'eventType': 'clickedObj'}]}).sort('time')

print str(S1.count() + S2.count()) + ' stroke records in the database.'
print str(C1.count() + S2.count()) + ' clickedObj records in the database.' # previously 722 so 882 ideally 

15497 stroke records in the database.
9314 clickedObj records in the database.


## generate group dataframes

In [6]:
reload(h)
## get list of all candidate games
games = coll.distinct('gameid')

## get list of complete and valid games
run3_complete_games = h.get_complete_and_valid_games(games,coll,iterationName1,researchers=researchers, tolerate_undefined_worker=False)
run4_complete_games = h.get_complete_and_valid_games(games,coll,iterationName2,researchers=researchers, tolerate_undefined_worker=False)

There are 34 complete games in total.
There are 35 complete games in total.


In [14]:
reload(h)
## generate actual dataframe and get only valid games (filtering out games with low accuracy, timeouts)
D_run3 = h.generate_dataframe(coll, run3_complete_games, iterationName1, results_dir)
D_run4 = h.generate_dataframe(coll, run4_complete_games, iterationName2, results_dir)

Analyzing game 7053-d1f771d6-a55b-4681-961f-5a9ebb43cbdb | 0 of 34 
Analyzing game 3558-8fa206a2-9b8c-4d67-a4ab-0832dc0d0f25 | 1 of 34 
Analyzing game 6964-d6de9966-f3dd-45df-8d24-d5bfd5d46eb8 | 2 of 34 
Analyzing game 4853-a05bfdd7-095c-4b93-bdde-2b99fd29c7a2 | 3 of 34 
Analyzing game 5616-7114c020-0e81-482c-952b-b26fc66076a0 | 4 of 34 


KeyboardInterrupt: 

In [None]:
## filtering outliers 
D_run3_filtered = h.filter_crazies(D_run3, 'numStrokes')
D_run3_filtered = h.filter_crazies(D_run3_filtered, 'numCurvesPerSketch')
D_run4_filtered = h.filter_crazies(D_run4, 'numStrokes')
D_run4_filtered = h.filter_crazies(D_run4_filtered, 'numCurvesPerSketch')

# filter out incorrect trials 
D_run3_correct = D_run3_filtered[D_run3_filtered['outcome'] == True]
D_run4_correct = D_run4_filtered[D_run4_filtered['outcome'] == True]

# keep this dataframe and make normalized dataframe for within-subject errors 
D_run3_normalized = D_run3_correct.copy(deep = True)
D_run4_normalized = D_run4_correct.copy(deep = True)

reload(h)
D_run3_normalized = h.grand_mean_normalize(D_run3_normalized, 'numStrokes', run3_complete_games)
D_run3_normalized = h.grand_mean_normalize(D_run3_normalized, 'drawDuration', run3_complete_games)
D_run3_normalized = h.grand_mean_normalize(D_run3_normalized, 'numCurvesPerSketch', run3_complete_games)
D_run3_normalized = h.grand_mean_normalize(D_run3_normalized, 'meanPixelIntensity', run3_complete_games)

D_run4_normalized = h.grand_mean_normalize(D_run4_normalized, 'numStrokes', run4_complete_games)
D_run4_normalized = h.grand_mean_normalize(D_run4_normalized, 'drawDuration', run4_complete_games)
D_run4_normalized = h.grand_mean_normalize(D_run4_normalized, 'numCurvesPerSketch', run4_complete_games)
D_run4_normalized = h.grand_mean_normalize(D_run4_normalized, 'meanPixelIntensity', run4_complete_games)

# writing out data 

## raw, unfiltered
D_run3.to_csv(os.path.join(results_dir, 'graphical_conventions_{}_{}.csv'.format('run3', 'raw')))
D_run4.to_csv(os.path.join(results_dir, 'graphical_conventions_{}_{}.csv'.format('run4', 'raw')))

## filtered, but includes correct and incorrect trials 
D_run3_filtered.to_csv(os.path.join(results_dir, 'graphical_conventions_{}_{}.csv'.format('run3', 'filtered')))
D_run4_filtered.to_csv(os.path.join(results_dir, 'graphical_conventions_{}_{}.csv'.format('run4', 'filtered')))

## filtered, and correct trials only 
D_run3_correct.to_csv(os.path.join(results_dir,'graphical_conventions_{}_{}.csv'.format('run3', 'unnormalized')))
D_run4_correct.to_csv(os.path.join(results_dir,'graphical_conventions_{}_{}.csv'.format('run4', 'unnormalized')))

## filtered, correct trials only, and normalized within subject 
D_run3_normalized.to_csv(os.path.join(results_dir,'graphical_conventions_{}_{}.csv'.format('run3', 'normalized')))
D_run4_normalized.to_csv(os.path.join(results_dir,'graphical_conventions_{}_{}.csv'.format('run4', 'normalized')))

### load in pre-existing dataframes to get png renders to extract features

In [15]:
fpath = os.path.join(results_dir,'graphical_conventions_{}_{}.csv'.format('run3', 'unnormalized'))
D_run3_correct = pd.read_csv(fpath)

fpath = os.path.join(results_dir,'graphical_conventions_{}_{}.csv'.format('run4', 'unnormalized'))
D_run4_correct = pd.read_csv(fpath)

In [32]:
reload(h)

<module 'df_generation_helpers' from '/Users/judithfan/graphical_conventions/analysis/python/df_generation_helpers.py'>

In [None]:
h.save_sketches(D_run3_correct, sketch_dir, 'combined', 'run3')
h.save_sketches(D_run4_correct, sketch_dir, 'combined', 'run4')

saving trial 1 sketch from game: 7053-d1f771d6-a55b-4681-961f-5a9ebb43cbdb
saving trial 12 sketch from game: 7053-d1f771d6-a55b-4681-961f-5a9ebb43cbdb
saving trial 25 sketch from game: 7053-d1f771d6-a55b-4681-961f-5a9ebb43cbdb
saving trial 35 sketch from game: 7053-d1f771d6-a55b-4681-961f-5a9ebb43cbdb
saving trial 6 sketch from game: 3558-8fa206a2-9b8c-4d67-a4ab-0832dc0d0f25
saving trial 17 sketch from game: 3558-8fa206a2-9b8c-4d67-a4ab-0832dc0d0f25
saving trial 27 sketch from game: 3558-8fa206a2-9b8c-4d67-a4ab-0832dc0d0f25
saving trial 38 sketch from game: 3558-8fa206a2-9b8c-4d67-a4ab-0832dc0d0f25
saving trial 9 sketch from game: 6964-d6de9966-f3dd-45df-8d24-d5bfd5d46eb8
saving trial 19 sketch from game: 6964-d6de9966-f3dd-45df-8d24-d5bfd5d46eb8
saving trial 29 sketch from game: 6964-d6de9966-f3dd-45df-8d24-d5bfd5d46eb8
saving trial 39 sketch from game: 6964-d6de9966-f3dd-45df-8d24-d5bfd5d46eb8
saving trial 11 sketch from game: 4853-a05bfdd7-095c-4b93-bdde-2b99fd29c7a2
saving trial 22

In [23]:
D = D_run3_correct

In [26]:
D.iloc[0]

Unnamed: 0                                                                  0
gameID                              7053-d1f771d6-a55b-4681-961f-5a9ebb43cbdb
trialNum                                                                    1
condition                                                            repeated
target                                                             waiting_07
category                                                              waiting
repetition                                                                  0
phase                                                                     pre
Generalization                                                         within
drawDuration                                                            5.415
outcome                                                                  True
response                                                           waiting_07
numStrokes                                                      

In [None]:
_D = D[D['condition'] == 'repeated']
for i,_d in _D.iterrows():
    print ("saving trial {} sketch from game: {}".format(_d['trialNum'],_d['gameID']))
    g = _d['gameID']
    imgData = _d['png']
    trialNum = _d['trialNum']
    target = _d['target']
    repetition = _d['repetition']
    filestr = base64.b64decode(imgData)
    fname = 'sketch.png'
    with open(fname, "wb") as fh:
        fh.write(imgData.decode('base64'))
    im = Image.open(fname)
    #im = im.convert("RGB")
    ### saving sketches to sketch_dir 
    filepath = os.path.join('{}_{}_{}_{}_{}.png'.format(g, trialNum, target, repetition, iterationName))     
    if not os.path.exists(os.path.join(sketch_dir,dir_name)):
        os.makedirs(os.path.join(sketch_dir,dir_name))
    im.save(os.path.join(sketch_dir,dir_name,filepath))