In [None]:
from __future__ import division

import os
import urllib, cStringIO

import pymongo as pm

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('poster')
sns.set_style('white')

import numpy as np
import scipy.stats as stats
import pandas as pd
import json
import re

from PIL import Image
import base64
import sys

### Setup

In [None]:
# directory & file hierarchy
proj_dir = os.path.abspath('../../..')
analysis_dir = os.getcwd()
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))

## add helpers to python path
if os.path.join(proj_dir,'analysis','python') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'analysis','python'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       
    
# Assign variables within imported analysis helpers
import analysis_helpers as h
if sys.version_info[0]>=3:
    from importlib import reload
reload(h)

In [None]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'rxdhawkins.me' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['3dObjects']
coll = db['graphical_conventions']

# which iteration name should we use?
iterationName = 'run0_bonusmeter'

In [None]:
## get total number of stroke and clickedObj events in the collection as a whole
S = coll.find({ '$and': [{'iterationName':iterationName}, {'eventType': 'stroke'}]}).sort('time')
C = coll.find({ '$and': [{'iterationName':iterationName}, {'eventType': 'clickedObj'}]}).sort('time')
print str(S.count()) + ' stroke records in the database.'
print str(C.count()) + ' clickedObj records in the database.'

### Generate group dataframe

In [None]:
## list of researcher mturk worker ID's to ignore
jefan = ['A1MMCS8S8CTWKU','A1MMCS8S8CTWKV','A1MMCS8S8CTWKS']
hawkrobe = ['A1BOIDKD33QSDK']
megsano = ['A1DVQQLVZR7W6I']
researchers = jefan + hawkrobe + megsano

In [None]:
## get list of all candidate games
games = coll.distinct('gameid')

## get list of complete and valid games
complete_games = h.get_complete_and_valid_games(games,coll,researchers=researchers)

In [None]:
# preprocessing 

TrialNum = []
GameID = []
Condition = []
Target = []
## TODO: add "Category" to this dataframe
Distractor1 = []
Distractor2 = []
Distractor3 = []
Outcome = []
Response = []
Repetition = []
Phase = []
numStrokes = []
drawDuration = [] # in seconds
svgStringLength = [] # sum of svg string for whole sketch
svgStringLengthPerStroke = [] # svg string length per stroke
numCurvesPerSketch = [] # number of curve segments per sketch
numCurvesPerStroke = [] # mean number of curve segments per stroke
svgStringStd = [] # std of svg string length across strokes for this sketch
Outcome = []
png=[]

_complete_games = complete_games[:2]
for i,g in enumerate(_complete_games):
        print 'Analyzing game {} | {} of {}: '.format(g, i, len(complete_games))

        # collection of all clickedObj events in a particular game 
        X = coll.find({ '$and': [{'gameid': g}, {'eventType': 'clickedObj'}]}).sort('time')
        # collection of all stroke events in a particular game 
        Y = coll.find({ '$and': [{'gameid': g}, {'eventType': 'stroke'}]}).sort('time')

        for t in X: # for each clickedObj event
            #print "new t"
            targetname = t['intendedName']
            Phase.append(t['phase'])
            Repetition.append(t['repetition'])
            distractors = [t['object2Name'],t['object3Name'],t['object4Name']]
            full_list = [t['intendedName'],t['object2Name'],t['object3Name'],t['object4Name']] 
            png.append(t['pngString'])

            #for each stroke event with same trial number as this particular clickedObj event 
            y = coll.find({ '$and': [{'gameid': g}, {'eventType': 'stroke'}, {'trialNum': t['trialNum']}]}).sort('time')
            lastStrokeNum = float(y[y.count() - 1]['currStrokeNum']) # get currStrokeNum at last stroke
            numStrokes.append(lastStrokeNum)
            startStrokeTime =  float(y[0]['startStrokeTime'])
            endStrokeTime = float(y[y.count() - 1]['endStrokeTime'])
            duration = (endStrokeTime - startStrokeTime) / 1000
            drawDuration.append(duration)
            ls = [len(_y['svgData']) for _y in y]
            svgStringLength.append(sum(ls))
            ## 
            y = coll.find({ '$and': [{'gameid': g}, {'eventType': 'stroke'}, {'trialNum': t['trialNum']}]}).sort('time')            
            num_curves = [len([m.start() for m in re.finditer('c',str(_y['svgData']))]) for _y in y] ## gotcha: need to call string on _y['svgData'], o/w its unicode and re cant do anything with it
            numCurvesPerSketch.append(sum(num_curves))
            numCurvesPerStroke.append(sum(num_curves)/lastStrokeNum)
            svgStringLengthPerStroke.append(sum(ls)/lastStrokeNum)
            svgStringStd.append(np.std(ls))
    
            ### aggregate game metadata
            TrialNum.append(t['trialNum'])
            GameID.append(t['gameid'])        
            Target.append(targetname)
            Condition.append(t['condition'])
            Response.append(t['clickedName'])
            Outcome.append(t['correct'])
            Distractor1.append(distractors[0])
            Distractor2.append(distractors[1])
            Distractor3.append(distractors[2])  


In [None]:
## now actually make dataframe
# MAIN DATA
GameID,TrialNum,Condition, Target, Repetition, Phase, drawDuration, Outcome, Response, numStrokes, svgStringLength, svgStringLengthPerStroke, svgStringStd, png = map(np.array, \
[GameID,TrialNum,Condition, Target, Repetition, Phase, drawDuration,Outcome, Response, numStrokes, svgStringLength, svgStringLengthPerStroke, svgStringStd, png])    

Repetition = map(int,Repetition)

D = pd.DataFrame([GameID,TrialNum,Condition, Target, Repetition, Phase, drawDuration,Outcome, Response, numStrokes, svgStringLength, svgStringLengthPerStroke, svgStringStd, png], 
                 index = ['gameID','trialNum','condition', 'target', 'repetition', 'phase', 'drawDuration','outcome', 'response', 'numStrokes', 'svgStringLength', 'svgStringLengthPerStroke', 'svgStringStd', 'png'])
D = D.transpose()

## save out dataframe to be able to load in and analyze later w/o doing the above mongo querying ...
D.to_csv(os.path.join(results_dir,'graphical_conventions_group_data_{}.csv'.format(iterationName)))

### Load group data and visualize behavioral measures over time

#### TODO
0. numCurves empty 
1. look at individual targets -actual sketches 
2. accuracy 
3. repeated vs control
4. urgent: what happened with '1180-2832f7d7-535a-4fb3-acf4-d40972f6d878' and '9114-90215f67-5c7f-467a-9664-cf43962b5daa' -- why are there multiple rows for the same trialNum?
5. add category to dataframe using dictionary in analysis_helpers
6. print function for control condition
7. replace placeholder in name of sketch rendered file with the actual category metadata
8. then we can factor out the sketch rendering functions to analysis_helpers

In [None]:
# reload analysis_helpers just to be on the safe side 
reload(h)

## set seaborn style params here
sns.set_context('poster')
sns.set_style('white')

##### plot time series during repetition phase

In [None]:
### which thing do you want to plot over time?
dv = 'drawDuration'
D0 = h.plot_across_repeats(D,var=dv,limit=12,save_plot=True,plot_dir=plot_dir)

##### compare conditions in pre and post phases

In [None]:
dv = 'drawDuration'
D1 = h.compare_conditions_prepost(D,
                                var=dv,
                                limit=15,
                                save_plot=True,
                                plot_dir=plot_dir)

### visualize how the sketches are changing across repetitions

In [None]:
### printing out REPEATED CONDITION sketches 

_valid_gameids = complete_games[6:]

for g in _valid_gameids:
    print 'Printing out sketches from game: ' + g
    trial_types = ['repeated']
    for tt in trial_types:
        _D = D[(D.condition=='repeated') & (D.gameID==g)]
        all_targs = np.unique(_D.target.values) ## use this later to name the file
        _D = _D.sort_values(by=['target','repetition'])
        _i = 1
        textsize=12
        fig = plt.figure(figsize=(16,6))
        for i,_d in _D.iterrows():
            imgData = _d['png']
            filestr = base64.b64decode(imgData)
            fname = 'sketch.png'
            with open(fname, "wb") as fh:
                fh.write(imgData.decode('base64'))
            textsize = 16
            # first plot the target
            im = Image.open(fname)
            p = plt.subplot(4,8,_i)
            plt.imshow(im)
            sns.set_style('white')
            k = p.get_xaxis().set_ticklabels([])
            k = p.get_yaxis().set_ticklabels([])
            k = p.get_xaxis().set_ticks([])
            k = p.get_yaxis().set_ticks([]) 
            outcome = _d['outcome']
            if outcome == 1:
                sides = ['bottom','top','right','left']
                for s in sides:
                    p.spines[s].set_color((0.4,0.8,0.4))
                    p.spines[s].set_linewidth(4)                               
            else:
                sides = ['bottom','top','right','left']
                for s in sides:
                    p.spines[s].set_color((0.9,0.2,0.2))
                    p.spines[s].set_linewidth(4)    
            if (_i-1 < 8) & (tt in 'repeated'): 
                plt.title('rep ' + str(_d['repetition']) ,fontsize=textsize)
            if (_i-1)%8==0:
                plt.ylabel(_d['target'] ,fontsize=textsize)

            _i  = _i + 1
                    
        filepath = os.path.join(sketch_dir,'repeated','{}_{}.pdf'.format(g,'placeholder'))                                                                
        if not os.path.exists(os.path.join(sketch_dir,'repeated')):
            os.makedirs(os.path.join(sketch_dir,'repeated'))
        plt.tight_layout()
        plt.savefig(os.path.join(sketch_dir,'repeated',filepath))
        plt.close(fig)


##### WORKING AREA: adapt below to render control condition sketches, too

In [None]:
# ### printing out CONTROL CONDITION sketches 

# #_valid_gameids = valid_gameids[:3]
# for g in valid_gameids:
#     print 'Printing out sketches from game: ' + g
# #     trial_types = np.unique(D2.trialType.values)
# #     trial_types = [i for i in list(np.unique(D2.trialType.values)) if i.split('_')[1]=='repeated']
#     trial_types = ['control']
#     for tt in trial_types:
#         _D = D2[(D2.condition=='control') & (D2.gameID==g)]
#         _D = _D.sort_values(by=['target','repetition'])
#         _i = 1
#         textsize=12
#         fig = plt.figure(figsize=(6,16))
#         for i,_d in _D.iterrows():
#             imgData = _d['png']
#             filestr = base64.b64decode(imgData)
#             fname = 'sketch.png'
#             with open(fname, "wb") as fh:
#                 fh.write(imgData.decode('base64'))
#             textsize = 16
#             # first plot the target
#             im = Image.open(fname)
#             p = plt.subplot(4,2,_i)
#             plt.imshow(im)
#             sns.set_style('white')
#             k = p.get_xaxis().set_ticklabels([])
#             k = p.get_yaxis().set_ticklabels([])
#             k = p.get_xaxis().set_ticks([])
#             k = p.get_yaxis().set_ticks([]) 
# #             plt.title(_d['repetition'])
#             outcome = _d['outcome']
#             if outcome == 1:
#                 sides = ['bottom','top','right','left']
#                 for s in sides:
#                     p.spines[s].set_color((0.4,0.8,0.4))
#                     p.spines[s].set_linewidth(4)                               
#             else:
#                 sides = ['bottom','top','right','left']
#                 for s in sides:
#                     p.spines[s].set_color((0.9,0.2,0.2))
#                     p.spines[s].set_linewidth(4)    
#             if (_i-1 < 2) & (tt in 'repeated'): 
#                 plt.title('rep ' + str(_d['repetition']) ,fontsize=textsize)
# #             if (_i < 5) & (tt in 'repeated'):
# #                 plt.title(_d['target'] ,fontsize=textsize)
#             if (_i-1)%2==0:
#                 plt.ylabel(_d['target'] ,fontsize=textsize)

#             _i  = _i + 1
#         filepath = os.path.join(sketch_dir,'control','gameID_' + _d['gameID'] + '_type_' + _d['condition'])
#         if not os.path.exists(os.path.join(sketch_dir,'control')):
#             os.makedirs(os.path.join(sketch_dir,'control'))
#         save(filepath, ext='pdf', close=True, verbose=False)
