# Notebook for Generating dataframes from multiple sessions

Pilot 3 added more checks for engagement early in the experiment.
Iteration names:
'pre-pilot3':

## 1. Read in packages and set up server connection
This first section will read in necessary packages for anaysis and establish file paths and connections to the mongo db server

In [1]:
import os
import sys
import urllib, io
os.getcwd()
sys.path.append("..")
sys.path.append("../utils")
sys.path.append("../analysis/utils")


import numpy as np
import scipy.stats as stats
import pandas as pd

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

import drawing_utils as drawing
import importlib
import scoring

  import pandas.util.testing as tm


In [2]:
## directory & file hierarchy
proj_dir = os.path.abspath('..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir =  os.path.abspath('.')
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'behavioral_experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

### establish connection to mongo
First thing you need to do is to establish an ssh tunnel (aka remote port forwarding) to the server, so that requests to the mongodb can be made "as if" the mongodb server is running on your local computer. Run this from the command line before you begin data analysis if you plan to fetch data from mongo:

`ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

In [3]:
# set vars 
auth = pd.read_csv(os.path.join(analysis_dir,'../auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org'

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['compositional-abstractions']
coll = db['prior-elicitation']

# which iteration name should we use?
iterationName = 'pre-pilot'

# ## look up number of trials (including paired-practice)
# numTrials = 13

In [4]:
df_all = pd.DataFrame(coll.find())
print(df_all.columns)

Index(['_id', 'rt', 'url', 'trial_type', 'trial_index', 'time_elapsed',
       'internal_node_id', 'wID', 'aID', 'hitID', 'iterationName', 'stimulus',
       'button_pressed', 'test_part', 'utterance', 'target', 'responses',
       'competitor_type', 'target_type', 'pptID', 'question_order', 'workerId',
       'gameID', 'eventType', 'prolificID', 'studyID', 'sessionID'],
      dtype='object')


In [5]:
df_responses = df_all[(df_all['trial_type'] == 'survey-text') & (df_all['iterationName'] == iterationName)]
print(df_responses.shape)
df_responses.head()

(783, 27)


Unnamed: 0,_id,rt,url,trial_type,trial_index,time_elapsed,internal_node_id,wID,aID,hitID,...,competitor_type,target_type,pptID,question_order,workerId,gameID,eventType,prolificID,studyID,sessionID
317,60a2fba16efd10604f6ae31e,146696.8,,survey-text,2,179511,0.0-2.0-0.0,,,,...,,,3220-bdd0f0ae-2706-6e5f-a9f6-14144a4f9bca,,,,,5f5668cf2db7050961fce434,60a2fa38193f79cbc028a199,60a2fae1d9fcf4eaf1d34599
320,60a2fbfd6efd10604f6ae321,18140.04,,survey-text,2,54614,0.0-2.0-0.0,,,,...,,,0070-8c047886-453d-68bf-8106-9afc1a4a4414,,,,,5dccffe0444b6e9766f054ab,60a2fa38193f79cbc028a199,60a2fbbf130c905141db63fb
323,60a2fc116efd10604f6ae324,19144.905,,survey-text,3,74264,0.0-2.0-0.1,,,,...,,,1066-f13de9bb-bdca-6566-823a-183899ed36a2,,,,,5dccffe0444b6e9766f054ab,60a2fa38193f79cbc028a199,60a2fbbf130c905141db63fb
324,60a2fc236efd10604f6ae325,18143.07,,survey-text,4,92912,0.0-2.0-0.2,,,,...,,,4639-11f3d82d-1880-6e5b-8dac-d1d1f36e4e14,,,,,5dccffe0444b6e9766f054ab,60a2fa38193f79cbc028a199,60a2fbbf130c905141db63fb
325,60a2fc2f6efd10604f6ae326,37924.0,,survey-text,2,116732,0.0-2.0-0.0,,,,...,,,6192-993c566e-4fdd-616f-b38f-403afed2f6f3,,,,,5eb9ead305efb9127be21acd,60a2fa38193f79cbc028a199,60a2fbad3837a3109a48c615


In [6]:
df_responses.columns

Index(['_id', 'rt', 'url', 'trial_type', 'trial_index', 'time_elapsed',
       'internal_node_id', 'wID', 'aID', 'hitID', 'iterationName', 'stimulus',
       'button_pressed', 'test_part', 'utterance', 'target', 'responses',
       'competitor_type', 'target_type', 'pptID', 'question_order', 'workerId',
       'gameID', 'eventType', 'prolificID', 'studyID', 'sessionID'],
      dtype='object')

In [7]:
df_responses['prolificID'].unique()

array(['5f5668cf2db7050961fce434', '5dccffe0444b6e9766f054ab',
       '5eb9ead305efb9127be21acd', '5ffe37322e58f34798bac49a',
       '609a09db472e1855f82a61d4', '5fbdd97ff6e47c0008f6cb51',
       '606e336f6de96d1f1b7e5b2f', '6063fd50b9782885b798ae67'],
      dtype=object)

### Get count of completed trials

In [8]:
df_responses.groupby(['prolificID'])['_id'].nunique().sort_values(ascending=False)

prolificID
609a09db472e1855f82a61d4    142
606e336f6de96d1f1b7e5b2f    130
6063fd50b9782885b798ae67    130
5eb9ead305efb9127be21acd    130
5dccffe0444b6e9766f054ab    130
5ffe37322e58f34798bac49a    118
5fbdd97ff6e47c0008f6cb51      2
5f5668cf2db7050961fce434      1
Name: _id, dtype: int64

In [9]:
pd.set_option('display.max_rows', None)
df_responses[df_responses['prolificID'] == '609a09db472e1855f82a61d4']['utterance']

522                            Upside down F in blue box 
527             Black shape that looks a bit like a snake
532     Black shape that look like stairs leading to a...
542            Black shape that looks like a snake again 
543                                           Backwards F
550                                       Stretched out S
553                                                  Tooa
557                         Explanation mark with no dot 
561                 Looks like a Viking boat from behind 
568                      Looks like the brandenburg gate 
579     Rectangle but the second chunck has been pulle...
584                                        Backwards flag
596                                      Backwards snake 
600                                           Tall snake 
605                                        Backwards flag
612                                 Headless man kneeling
621                                Person doing crunches 
625           

In [23]:
df_responses.groupby('target')['utterance'].describe()

Unnamed: 0_level_0,count,unique,top,freq
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
prior-stimuli/tower_0.png,6,5,rectangle,2
prior-stimuli/tower_1.png,6,6,1 rectangle and 4 squares,1
prior-stimuli/tower_10.png,6,6,Someone leading down to be sick,1
prior-stimuli/tower_100.png,6,6,2 rectangles and 1 square,1
prior-stimuli/tower_101.png,6,6,Sideways hat,1
prior-stimuli/tower_102.png,6,6,two lines touching on the sides,1
prior-stimuli/tower_103.png,7,7,line with edges coming off top and bottom,1
prior-stimuli/tower_104.png,6,6,Tetris shape,1
prior-stimuli/tower_105.png,6,6,2 rectangles and 1 square,1
prior-stimuli/tower_106.png,6,6,Directions sign,1


In [27]:
df_responses.sort_values('target')[['target','utterance']]

Unnamed: 0,target,utterance
863,prior-stimuli/tower_0.png,rectangle
1119,prior-stimuli/tower_0.png,Black rectangle
340,prior-stimuli/tower_0.png,a horizontal rectangle
454,prior-stimuli/tower_0.png,garage
839,prior-stimuli/tower_0.png,it is a rectangle
476,prior-stimuli/tower_0.png,rectangle
636,prior-stimuli/tower_1.png,Bed with boxes
824,prior-stimuli/tower_1.png,seat
646,prior-stimuli/tower_1.png,squished 2
496,prior-stimuli/tower_1.png,please
