# Notebook for Generating dataframes from multiple sessions

Pilot 3 added more checks for engagement early in the experiment.
Iteration names:
'pre-pilot3':

## 1. Read in packages and set up server connection
This first section will read in necessary packages for anaysis and establish file paths and connections to the mongo db server

In [7]:
import os
import sys
import urllib, io
os.getcwd()
sys.path.append("..")
sys.path.append("../utils")
sys.path.append("../analysis/utils")


import numpy as np
import scipy.stats as stats
import pandas as pd

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

import drawing_utils as drawing
import importlib
import scoring

In [8]:
## directory & file hierarchy
proj_dir = os.path.abspath('..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir =  os.path.abspath('.')
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'behavioral_experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

### establish connection to mongo
First thing you need to do is to establish an ssh tunnel (aka remote port forwarding) to the server, so that requests to the mongodb can be made "as if" the mongodb server is running on your local computer. Run this from the command line before you begin data analysis if you plan to fetch data from mongo:

`ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

In [9]:
# set vars 
auth = pd.read_csv(os.path.join(analysis_dir,'../auth.txt'), header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org'

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['compositional-abstractions']
coll = db['prior-elicitation']

# which iteration name should we use?
iterationName = 'testing'

# ## look up number of trials (including paired-practice)
# numTrials = 13

In [17]:
df_all = pd.DataFrame(coll.find())

df_all

Unnamed: 0,_id,rt,url,trial_type,trial_index,time_elapsed,internal_node_id,wID,aID,hitID,iterationName,stimulus,button_pressed,test_part,utterance,target,responses,competitor_type,target_type,pptID
0,6075e19882839152199b04e7,7466.35,consent/consent.html,external-html,0,7639,0.0-0.0,,,,testing,,,,,,,,,
1,6075e19a82839152199b04e8,1718.875,,html-button-response,1,9364,0.0-1.0,,,,testing,"<p>In this experiment, you will see one or mor...",0.0,setup,,,,,,
2,6075e19c82839152199b04e9,788.225,,html-button-response,2,11155,0.0-2.0,,,,testing,"<p>In this block, you will see one picture on ...",0.0,block-setup,,,,,,
3,6079e46b82839152199b04ea,46822.66,consent/consent.html,external-html,0,46862,0.0-0.0,,,,testing,,,,,,,,,
4,6079e6cd82839152199b04eb,20985.52,consent/consent.html,external-html,0,21030,0.0-0.0,,,,testing,,,,,,,,,
5,6079e6ce82839152199b04ec,1375.96,,html-button-response,1,22417,0.0-1.0,,,,testing,"<p>In this experiment, you will see one or mor...",0.0,setup,,,,,,
6,6079e6d082839152199b04ed,1136.02,,html-button-response,2,24561,0.0-2.0,,,,testing,"<p>In this block, you will see one picture on ...",0.0,block-setup,,,,,,
7,6079e8fb82839152199b04ee,219657.505,consent/consent.html,external-html,0,219698,0.0-0.0,,,,testing,,,,,,,,,
8,6079e8fd82839152199b04ef,1919.17,,html-button-response,1,221623,0.0-1.0,,,,testing,"<p>In this experiment, you will see one or mor...",0.0,setup,,,,,,
9,6079e8ff82839152199b04f0,1041.995,,html-button-response,2,223673,0.0-2.0,,,,testing,"<p>In this block, you will see one picture on ...",0.0,block-setup,,,,,,


In [19]:
df_responses = df_all[df_all['trial_type'] == 'survey-text']
df_responses.head()

Unnamed: 0,_id,rt,url,trial_type,trial_index,time_elapsed,internal_node_id,wID,aID,hitID,iterationName,stimulus,button_pressed,test_part,utterance,target,responses,competitor_type,target_type,pptID
25,6079f19682839152199b0500,2483.82,,survey-text,3,11357,0.0-3.0-0.0,,,,testing,,,trial,daf,tower_102.png,"{""Q0"":""daf""}",isolated,tangram,
26,6079f19a82839152199b0501,3218.195,,survey-text,4,15085,0.0-3.0-0.1,,,,testing,,,trial,asdf,tower_74.png,"{""Q0"":""asdf""}",isolated,tangram,
27,6079f19e82839152199b0502,3431.665,,survey-text,5,19024,0.0-3.0-0.2,,,,testing,,,trial,asdf,tower_124.png,"{""Q0"":""asdf""}",isolated,tangram,
31,6079f2b382839152199b0506,2861.785,,survey-text,3,17417,0.0-3.0-0.0,,,,testing,,,trial,fdsg,tower_30.png,"{""Q0"":""fdsg""}",isolated,tangram,
32,6079f2b682839152199b0507,2842.645,,survey-text,4,20768,0.0-3.0-0.1,,,,testing,,,trial,boop,tower_87.png,"{""Q0"":""boop""}",isolated,tangram,
