In [2]:
from __future__ import division

import numpy as np
import os, sys
from PIL import Image
import pandas as pd
import json
import pickle

from matplotlib import pylab, mlab, pyplot
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.path import Path
import matplotlib.patches as patches
%matplotlib inline

from IPython.core.pylabtools import figsize, getfigs

import seaborn as sns
from sklearn.model_selection import StratifiedKFold

import random

from scipy.stats import norm
from IPython.display import clear_output

import copy
import importlib


### Add Paths

## root paths
curr_dir = os.getcwd()
proj_dir = os.path.abspath(os.path.join(curr_dir,'..')) ## u\e relative paths

## add helpers to python path
import sys
if os.path.join(proj_dir, 'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir, 'stimuli'))

## custom helper modules
import blockworld_helpers as utils

  import pandas.util.testing as tm


In [3]:
# setup
block_dims = [(2,1),(1,2)]
block_colors = ['#B32F0A','#0A2FB3']
black = ['#000000','#000000'] # used to display silhouettes
grey = ['#333333','#333333']
world_width = 8
world_height = 8
world_center = int((world_width-1)/2)


In [4]:
silhouette_superset = pd.read_pickle("./tower_4_block_unique_silhouettes/tower_4_block_unique_silhouettes.pkl")

Goal:
10 annotations total of each stim.
Each participant should annotate a small number of towers (say less than 40).
We should be able to check if different contexts change the annotations.


Partition towers into 5 sets of 26 with roughly equal numbers of tall/ wide/ neither.
Collect annotations from 5 ppts for each set.
Find 2 such partitions.

In [5]:
groups = silhouette_superset[['tower_number_str','group']]
groups['group'].value_counts()

neither    94
wide       18
tall       18
Name: group, dtype: int64

We have 18 wide and 18 tall towers.
So each set should have 3 or 4 wides and 3 or 4 tall.

In [6]:
# neither = list(groups.loc[groups['group'] == 'neither','tower_number_str'])
# wide = list(groups.loc[groups['group'] == 'wide','tower_number_str'])
# tall = list(groups.loc[groups['group'] == 'tall','tower_number_str'])

In [74]:
n_families = 2
n_splits = 10
X = silhouette_superset['tower_number_str']
y = silhouette_superset['group']

for i in range(0, n_families):
    
    # set up partitioning
    skf = StratifiedKFold(n_splits=n_splits, 
                          random_state=i, 
                          shuffle=True) # tie random state to group num
    
    # apply partitioning and save to df
    for split_num, (train_index, test_index) in enumerate(skf.split(X, y)):
        silhouette_superset.loc[test_index,'family_'+str(i)] = split_num
#         print(groups.loc[test_index])

# silhouette_superset

In [75]:
# verify
if False:
    family = 1
    split = 0

    tower_worlds = [utils.worldify(w,
                                   block_dims = block_dims, 
                                   block_colors = block_colors)\
                        for w in silhouette_superset.groupby('family_'+str(family)).get_group(split)['stim']]

    for i, w in enumerate(tower_worlds):
        fig = utils.draw_tower(w)
        print(silhouette_superset.groupby('family_'+str(family)).get_group(split).reset_index(split).loc[i,'group'])

## To run multiple versions, upload the same metadate to separate collections, and update stimColName in configs accordingly

In [106]:
experiment_name = 'ca_prior_elicitation_4_block_unique_silhouettes_procedural'
# experiment_name = 'ca_prior_elicitation_4_block_unique_silhouettes_categorization'
versionInd = 0

In [107]:
# connect to mongo

import pymongo as pm

# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## experiment server ip address

conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[experiment_name]

In [108]:
# convert to lists of stimulus names

metadata = []

for f in range(0, n_families):
    for s in range(0,n_splits):
        stim_numbers = list(silhouette_superset.groupby('family_'+str(f)).get_group(s)['tower_number_str'])
#         print(stim_numbers)
        metadata.append(
            {
                'partitionFamily': f,
                'splitNumber': s,
                'stimNumbers': stim_numbers,
                'ntrials': len(stim_numbers),
                'stimGroups': {n: silhouette_superset.groupby('family_'+str(f)).get_group(s).reset_index().loc[i,'group'] for i, n in enumerate(stim_numbers)},
                'numGames': 0,
                'games': [],
                'experimentType': 'prior_elicitation',
                'experimentName': experiment_name,
                's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
                'versionInd': versionInd
            })
        
metadata

[{'partitionFamily': 0,
  'splitNumber': 0,
  'stimNumbers': ['000',
   '011',
   '025',
   '029',
   '034',
   '041',
   '049',
   '060',
   '065',
   '068',
   '081',
   '100',
   '113'],
  'ntrials': 13,
  'stimGroups': {'000': 'neither',
   '011': 'wide',
   '025': 'wide',
   '029': 'neither',
   '034': 'neither',
   '041': 'neither',
   '049': 'neither',
   '060': 'neither',
   '065': 'neither',
   '068': 'neither',
   '081': 'neither',
   '100': 'tall',
   '113': 'neither'},
  'numGames': 0,
  'games': [],
  'experimentType': 'prior_elicitation',
  'experimentName': 'ca_prior_elicitation_4_block_unique_silhouettes_procedural',
  's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
  'versionInd': 0},
 {'partitionFamily': 0,
  'splitNumber': 1,
  'stimNumbers': ['007',
   '009',
   '019',
   '023',
   '053',
   '061',
   '062',
   '072',
   '076',
   '084',
   '086',
   '106',
   '115'],
  'ntrials': 13,
  'stimGroups': {'007': 'neither',
   '009': 'neither',
   '019': 'wide'

In [109]:
experiment_name

'ca_prior_elicitation_4_block_unique_silhouettes_procedural'

In [110]:
really_run = True;

if really_run:
    db.drop_collection(experiment_name)

In [111]:
## now, iterate through each version and insert into mongo
## loop through list of records and insert each into collection
reallyRun = 1
if reallyRun:
    for (i,j) in enumerate(metadata):        
        coll.insert_one(j)
        print('Inserted version {} of stimDict.'.format(j['versionInd']))
        clear_output(wait=True)

else:
    print('Did not insert any new data.')

Inserted version 0 of stimDict.


In [112]:
coll.find_one()

{'_id': ObjectId('610b055a8ad19cd87c7e42d7'),
 'partitionFamily': 0,
 'splitNumber': 0,
 'stimNumbers': ['000',
  '011',
  '025',
  '029',
  '034',
  '041',
  '049',
  '060',
  '065',
  '068',
  '081',
  '100',
  '113'],
 'ntrials': 13,
 'stimGroups': {'000': 'neither',
  '011': 'wide',
  '025': 'wide',
  '029': 'neither',
  '034': 'neither',
  '041': 'neither',
  '049': 'neither',
  '060': 'neither',
  '065': 'neither',
  '068': 'neither',
  '081': 'neither',
  '100': 'tall',
  '113': 'neither'},
 'numGames': 0,
 'games': [],
 'experimentType': 'prior_elicitation',
 'experimentName': 'ca_prior_elicitation_4_block_unique_silhouettes_procedural',
 's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
 'versionInd': 0}

In [113]:
len(list(coll.find()))

20

In [114]:
list(coll.find())

[{'_id': ObjectId('610b055a8ad19cd87c7e42d7'),
  'partitionFamily': 0,
  'splitNumber': 0,
  'stimNumbers': ['000',
   '011',
   '025',
   '029',
   '034',
   '041',
   '049',
   '060',
   '065',
   '068',
   '081',
   '100',
   '113'],
  'ntrials': 13,
  'stimGroups': {'000': 'neither',
   '011': 'wide',
   '025': 'wide',
   '029': 'neither',
   '034': 'neither',
   '041': 'neither',
   '049': 'neither',
   '060': 'neither',
   '065': 'neither',
   '068': 'neither',
   '081': 'neither',
   '100': 'tall',
   '113': 'neither'},
  'numGames': 0,
  'games': [],
  'experimentType': 'prior_elicitation',
  'experimentName': 'ca_prior_elicitation_4_block_unique_silhouettes_procedural',
  's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
  'versionInd': 0},
 {'_id': ObjectId('610b055a8ad19cd87c7e42d8'),
  'partitionFamily': 0,
  'splitNumber': 1,
  'stimNumbers': ['007',
   '009',
   '019',
   '023',
   '053',
   '061',
   '062',
   '072',
   '076',
   '084',
   '086',
   '106',
   '11

## Test which versions have been run

In [94]:
experiment_name = 'ca_prior_elicitation_4_block_unique_silhouettes_procedural'
# experiment_name = 'ca_prior_elicitation_4_block_unique_silhouettes_categorization'

In [95]:
# connect to mongo
coll = db[experiment_name]

In [96]:
list(coll.find())

[{'_id': ObjectId('6109c8458ad19cd87c7e4298'),
  'partitionFamily': 0,
  'splitNumber': 0,
  'stimNumbers': ['000',
   '011',
   '025',
   '029',
   '034',
   '041',
   '049',
   '060',
   '065',
   '068',
   '081',
   '100',
   '113'],
  'ntrials': 13,
  'stimGroups': {'000': 'neither',
   '011': 'wide',
   '025': 'wide',
   '029': 'neither',
   '034': 'neither',
   '041': 'neither',
   '049': 'neither',
   '060': 'neither',
   '065': 'neither',
   '068': 'neither',
   '081': 'neither',
   '100': 'tall',
   '113': 'neither'},
  'numGames': 0,
  'games': [],
  'experimentType': 'prior_elicitation',
  'experimentName': 'ca_prior_elicitation_4_block_unique_silhouettes_procedural',
  's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
  'versionInd': 0},
 {'_id': ObjectId('6109c8458ad19cd87c7e4299'),
  'partitionFamily': 0,
  'splitNumber': 1,
  'stimNumbers': ['007',
   '009',
   '019',
   '023',
   '053',
   '061',
   '062',
   '072',
   '076',
   '084',
   '086',
   '106',
   '11