In [1]:
import pymongo as pm
import os
import sys
import numpy as np
import pandas as pd
import json
from IPython.display import clear_output
import itertools
from itertools import groupby

## plotting
import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

## connect to db

#### (1) establish tunnel to lab server: 
- Run this in terminal: `ssh -fNL 27017:127.0.0.1:27017 USER@cogtoolslab.org`
OR

- Do this once: Add this to your .bashrc or (.zshrc), then run `source .bashrc`:
`alias tunnel_cogtoolslab="ssh -fNL 27017:127.0.0.1:27017 USER@cogtoolslab.org"`

#### (2) establish connection to target db and collection

In [2]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## experiment server ip address

conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db['block-construction-silhouette']

# raw stimulus data
path_to_interesting_structures = 'interesting_structures'


In [None]:
# print list of database names 
# then list of collection names within stimuli db
try:
    print('dbnames:')
    print(conn.list_database_names())
    print('colnames:')
    print(conn['stimuli'].list_collection_names())
except:    ## if running in python2 notebook
    print('dbnames:')
    print(conn.database_names())
    print('colnames:')
    print(conn['stimuli'].collection_names())    

#### Setting up the stimulus dictionary
- In the design of silhouette-study-1, there are two conditions: ['physical', 'mental'], which will be manipulated within-participant.
- At the beginning of each experimental session, all of the trials to construct a single session are fetched from the database. 
- Each trial dictionary needs to contain the following attributes: 
    - `target`: List of blocks generated elsewhere in this `stimuli/` dir
    - `condition`: "external" can be a placeholder
    - `games`: Empty games list that will be populated by `getstims` inside `store.js` as games fetch this type of game

#### helper functions

In [None]:
def flatten(x):
    return [item for sublist in x for item in sublist]

def get_uniq_orders(X):
    return [list(x) for x in set(tuple(x) for x in X)]

def get_longest_run(a):
    '''
    given a list a, return the length of the longest consecutive streak
    '''
    lst = []
    for n,c in groupby(a):
        num,count = n,sum(1 for i in c)
        lst.append((num,count))
    maxx = max([y for x,y in lst])
    return maxx


def get_all_good_seqs(file_list, conditions = ['a','b'], miniblock_size = 4):
    
    '''
    file_list = list of uniq stims 
    miniblock_size = length of epoch that we 
    '''    
    num_stims = len(file_list)
    assert num_stims%miniblock_size==0        
    miniblock_template = np.tile(conditions, int(miniblock_size/len(conditions))) ## ['mental','physical','mental','physical']        
    all_permutes = list(itertools.permutations(miniblock_template))
    uniq_orders = list(dict.fromkeys(all_permutes))
    num_miniblocks = int(num_stims/4)

    ## randomly sample miniblocks of size 4, concatenate, then measure center of mass of each condition
    com1 = [] ## center of mass of condition 1
    com2 = [] ## center of mass of condition 2
    cond_seq = []
    numIters = 5000
    for thisIter in np.arange(numIters):
        b = np.random.RandomState(thisIter).choice(np.arange(len(uniq_orders)),size=num_miniblocks,replace=True)
        _cond_seq = flatten([uniq_orders[_b] for _b in b])
        cond_seq.append(_cond_seq)
        com1.append(np.where(np.array(_cond_seq)=='mental')[0].mean())
        com2.append(np.where(np.array(_cond_seq)=='physical')[0].mean())
    com1, com2 = map(np.array,[com1,com2])

    ## CRITERION 1: mean position of each condition equated
    ## get unique orderings from randomly sampled sequences where center of mass is equal between conditions
    not_diff_inds = np.where(com1-com2==0)[0]
    x = get_uniq_orders([cond_seq[_i] for _i in not_diff_inds])

    ## CRITERION 2: no streaks > streak_thresh (e.g., 3)
    streak_thresh = 3
    nostreak_inds = np.where(np.array(list(map(get_longest_run,x))) < streak_thresh)[0]

    ## get list of good sequences that survive: (1) mean position of each condition equated; (2) no streaks > length of 3
    good_seqs = [x[i] for i in nostreak_inds]

    return good_seqs

#### build list of trial lists

In [None]:
## load in list of structures
file_list = os.listdir(path_to_interesting_structures)
print('There are {} interesting structures to load.'.format(len(file_list)))

## get list of good sequences
good_seqs = get_all_good_seqs(file_list, conditions = ['mental','physical'], miniblock_size = 4)
print('There are {} "good" sequences that satisfy our criteria.'.format(len(good_seqs)))

## loop through list of good sequences, and build list of versions, each containing a trial list 
Meta = [] ## initialize list of all trial lists
for version_ind, seq in enumerate(good_seqs):    
    trial_list = []
    for i,f in enumerate(file_list):
        stim_list = pd.read_json(os.path.join(path_to_interesting_structures,f),orient='records') ## stim list
        this_targetBlocks = list(stim_list['blocks'].values)
        this_targetName = f.split('.')[0]
        this_trial = {'targetBlocks':this_targetBlocks,
                      'targetName': this_targetName, 
                      'trialNum' : i,
                      'condition': seq[i],
                      'versionInd': version_ind}
        trial_list.append(this_trial) ## this is not particularly elegant -- I AM NOT PROUD, @jefan.
    
    ## convert to dataframe
    trial_df = pd.DataFrame(trial_list)
    stimList = trial_df.to_dict(orient='records')
    
    ## bundle all of the stims into stimDict, under the 'meta' key, and also add 'games' list 
    ## (to keep track of which games) and numTrials
    stimDict = {}
    stimDict['meta'] = stimList
    stimDict['games'] = []
    stimDict['numTrials'] = len(stimList)
    stimDict['experimentName'] = 'silhouette-testing1' ## way of keeping track of different stimulus sets
    stimDict['versionInd'] = version_ind
    ## append stimdict to Meta
    Meta.append(stimDict) 

print('Created Metadata.')

## save out Meta as json
MetaDF = pd.DataFrame(Meta)
MetaDF.to_json('stimDict.json',orient='records')

## Load metadata back in as a list of dictionaries
import json
J = json.loads(open('stimDict.json',mode='r').read())

In [None]:
## now, iterate through each version and insert into mongo
## loop through list of records and insert each into collection
reallyRun = 0
if reallyRun:
    for (i,j) in enumerate(J):        
        if 'meta' in j.keys(): ## if 'meta' is accessible from the top of stimDict, implies that only one version of trial sequence
            coll.insert_one(j)
            print('Inserted {} version of stimDict.'.format(j['versionInd']))
            clear_output(wait=True)
        else:
            print('Oops, the stimDict is missing the "meta" key.')
else:
    print('Did not insert any new data.')

### inspect and validate collection

In [None]:
print('There are {} records in this collection.'.format(coll.estimated_document_count()))

In [None]:
## Now let's look at one of the records
coll.find_one()

In [None]:
# TODO: run sanity checks to make sure that every target structure appears exactly once per session