In [1]:
from __future__ import division

import numpy as np
import os, sys
from PIL import Image
import pandas as pd
import json
import pickle

from matplotlib import pylab, mlab, pyplot
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.path import Path
import matplotlib.patches as patches
%matplotlib inline

from IPython.core.pylabtools import figsize, getfigs

import seaborn as sns
from sklearn.model_selection import StratifiedKFold

import random

from scipy.stats import norm
from IPython.display import clear_output

import copy
import importlib


### Add Paths

## root paths
curr_dir = os.getcwd()
proj_dir = os.path.abspath(os.path.join(curr_dir,'..')) ## u\e relative paths

## add helpers to python path
import sys
if os.path.join(proj_dir, 'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir, 'stimuli'))

## import utils from git submodule
## custom helper modules
sys.path.append("./block_utils/")
import blockworld_utils as utils
import domino_settings as dominoes

  import pandas.util.testing as tm


In [2]:
# setup
block_dims = dominoes.block_dims
block_colors = dominoes.block_colors
world_width = dominoes.world_width
world_height = dominoes.world_height
world_center = dominoes.world_center
black = ['#000000','#000000'] # used to display silhouettes
grey = ['#333333','#333333']

In [3]:
silhouette_superset = pd.read_pickle("./tower_4_block_unique_silhouettes/tower_4_block_unique_silhouettes.pkl")

Goal:
10 annotations total of each stim.
Each participant should annotate a small number of towers (say less than 40).
We should be able to check if different contexts change the annotations.


Partition towers into 5 sets of 26 with roughly equal numbers of tall/ wide/ neither.
Collect annotations from 5 ppts for each set.
Find 2 such partitions.

In [4]:
groups = silhouette_superset[['tower_number_str','group']]
groups['group'].value_counts()

neither    94
tall       18
wide       18
Name: group, dtype: int64

We have 18 wide and 18 tall towers.
So each set should have 3 or 4 wides and 3 or 4 tall.

In [6]:
# neither = list(groups.loc[groups['group'] == 'neither','tower_number_str'])
# wide = list(groups.loc[groups['group'] == 'wide','tower_number_str'])
# tall = list(groups.loc[groups['group'] == 'tall','tower_number_str'])

In [5]:
n_families = 2
n_splits = 10
X = silhouette_superset['tower_number_str']
y = silhouette_superset['group']

for i in range(0, n_families):
    
    # set up partitioning
    skf = StratifiedKFold(n_splits=n_splits, 
                          random_state=i, 
                          shuffle=True) # tie random state to group num
    
    # apply partitioning and save to df
    for split_num, (train_index, test_index) in enumerate(skf.split(X, y)):
        silhouette_superset.loc[test_index,'family_'+str(i)] = split_num
#         print(groups.loc[test_index])

# silhouette_superset

In [8]:
# verify
if False:
    family = 1
    split = 0

    tower_worlds = [utils.worldify(w,
                                   block_dims = block_dims, 
                                   block_colors = block_colors)\
                        for w in silhouette_superset.groupby('family_'+str(family)).get_group(split)['stim']]

    for i, w in enumerate(tower_worlds):
        fig = utils.draw_tower(w)
        print(silhouette_superset.groupby('family_'+str(family)).get_group(split).reset_index(split).loc[i,'group'])

## To run multiple versions, upload the same metadate to separate collections, and update stimColName in configs accordingly

In [110]:
experiment_name = 'ca_prior_elicitation_4_block_unique_silhouettes_procedural'
# experiment_name = 'ca_prior_elicitation_4_block_unique_silhouettes_categorization'
versionInd = 0

In [92]:
# connect to mongo

import pymongo as pm

# set vars 
auth = pd.read_csv('../../auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org' ## experiment server ip address

conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['stimuli']
coll = db[experiment_name]

In [93]:
# convert to lists of stimulus names

metadata = []

for f in range(0, n_families):
    for s in range(0,n_splits):
        stim_numbers = list(silhouette_superset.groupby('family_'+str(f)).get_group(s)['tower_number_str'])
#         print(stim_numbers)
        metadata.append(
            {
                'partitionFamily': f,
                'splitNumber': s,
                'stimNumbers': stim_numbers,
                'ntrials': len(stim_numbers),
                'stimGroups': {n: silhouette_superset.groupby('family_'+str(f)).get_group(s).reset_index().loc[i,'group'] for i, n in enumerate(stim_numbers)},
                'numGames': 0,
                'games': [],
                'experimentType': 'prior_elicitation',
                'experimentName': experiment_name,
                's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
                'versionInd': versionInd
            })

In [94]:
# Clear metadata collection

really_run = False;

if really_run:
    db.drop_collection(experiment_name)

In [95]:
## now, iterate through each version and insert into mongo
## loop through list of records and insert each into collection
reallyRun = False
if reallyRun:
    for (i,j) in enumerate(metadata):        
        coll.insert_one(j)
        print('Inserted version {} of stimDict.'.format(j['versionInd']))
        clear_output(wait=True)

else:
    print('Did not insert any new data.')

Did not insert any new data.


In [96]:
list(coll.find())

[{'_id': ObjectId('611bfd71767fe421dbdca4ac'),
  'partitionFamily': 0,
  'splitNumber': 0,
  'stimNumbers': ['000',
   '011',
   '025',
   '029',
   '034',
   '041',
   '049',
   '060',
   '065',
   '068',
   '081',
   '100',
   '113'],
  'ntrials': 13,
  'stimGroups': {'000': 'neither',
   '011': 'wide',
   '025': 'wide',
   '029': 'neither',
   '034': 'neither',
   '041': 'neither',
   '049': 'neither',
   '060': 'neither',
   '065': 'neither',
   '068': 'neither',
   '081': 'neither',
   '100': 'tall',
   '113': 'neither'},
  'numGames': 6,
  'games': ['4636-ee3023be-6db3-47a7-ace2-6107dfd7ca29',
   '0888-24a4244a-6fd2-46d8-b8d1-ad1cbe116ccd',
   '9052-4a613e87-dd33-445c-98bb-19af91a6f1bd',
   '5486-c491d5b0-7311-473e-ac05-9893b3abf381',
   '5989-fe1433c8-7f3b-4a49-8613-d2421a22a335',
   '0147-b0891954-08ff-46d8-81eb-b8feded612ea'],
  'experimentType': 'prior_elicitation',
  'experimentName': 'ca_prior_elicitation_4_block_unique_silhouettes_procedural',
  's3_bucket': 'lax-tower-4-b

## Test which versions have been run

This grabs the dataframe created by data generator, to see which records need to be run more times.

It wipes the metadata from mongo, and replaces it with individual records for each additional partition that needs to be run.

In [125]:
version = 'procedural'
# version = 'categorization'

In [126]:
# WARNING: THIS JUST GRABS A LOCAL CSV. ENSURE YOU HAVE CREATED THIS WITH DATA GENERATOR NOTEBOOK

run_results_through_data_generator = True


if run_results_through_data_generator:
    results_csv_directory = "../../results/csv"
    # print("Possible CSV results files to load:")
    # print(os.listdir(results_csv_directory))

    result_csv = 'lax-tower-4-block-unique-silhouettes-' + version +'-pilot_3.csv'

    df_trial = pd.read_csv(os.path.join(results_csv_directory, result_csv))

In [127]:
complete_counts = (df_trial.groupby(['partitionFamily','splitNumber']).count()/13)['datatype'].reset_index()
complete_counts

Unnamed: 0,partitionFamily,splitNumber,datatype
0,0.0,0.0,6.0
1,0.0,1.0,5.0
2,0.0,2.0,5.0
3,0.0,3.0,5.0
4,0.0,4.0,5.0
5,0.0,5.0,5.0
6,0.0,6.0,5.0
7,0.0,7.0,5.0
8,0.0,8.0,5.0
9,0.0,9.0,5.0


In [128]:
# add a record in metadata for each additional time it needs to be run

n_expected = 5

extra_metadata = []

for f in range(0, n_families):
    for s in range(0,n_splits):
        
        n_completed = complete_counts[(complete_counts.partitionFamily == f) & (complete_counts.splitNumber == s)]\
            .reset_index().loc[0,'datatype']
        
        i = n_completed
        
        while i < n_expected:
            
            i = i + 1

            stim_numbers = list(silhouette_superset.groupby('family_'+str(f)).get_group(s)['tower_number_str'])
    #         print(stim_numbers)
            extra_metadata.append(
                {
                    'partitionFamily': f,
                    'splitNumber': s,
                    'stimNumbers': stim_numbers,
                    'ntrials': len(stim_numbers),
                    'stimGroups': {n: silhouette_superset.groupby('family_'+str(f)).get_group(s).reset_index().loc[i,'group'] for i, n in enumerate(stim_numbers)},
                    'numGames': 0,
                    'games': [],
                    'experimentType': 'prior_elicitation',
                    'experimentName': experiment_name,
                    's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
                    'versionInd': versionInd,
                    'extra_metadata_index': i
                })

In [129]:
print(str(len(extra_metadata)) + ' extra records to upload')

0 extra records to upload


In [115]:
# Delete metadata from db
really_run = True

if really_run:
    db.drop_collection(experiment_name)

In [117]:
## now, iterate through each version and insert into mongo
## loop through list of records and insert each into collection
reallyRun = True
if reallyRun:
    for (i,j) in enumerate(extra_metadata):        
        coll.insert_one(j)
        print('Inserted version {} of stimDict.'.format(j['versionInd']))
        clear_output(wait=True)

else:
    print('Did not insert any new data.')

Inserted version 0 of stimDict.


In [118]:
list(coll.find())

[{'_id': ObjectId('61241d4480612a35577a218c'),
  'partitionFamily': 0,
  'splitNumber': 1,
  'stimNumbers': ['007',
   '009',
   '019',
   '023',
   '053',
   '061',
   '062',
   '072',
   '076',
   '084',
   '086',
   '106',
   '115'],
  'ntrials': 13,
  'stimGroups': {'007': 'neither',
   '009': 'neither',
   '019': 'wide',
   '023': 'wide',
   '053': 'neither',
   '061': 'neither',
   '062': 'neither',
   '072': 'neither',
   '076': 'neither',
   '084': 'neither',
   '086': 'neither',
   '106': 'neither',
   '115': 'tall'},
  'numGames': 0,
  'games': [],
  'experimentType': 'prior_elicitation',
  'experimentName': 'procedural',
  's3_bucket': 'lax-tower-4-block-unique-silhouettes-json',
  'versionInd': 0,
  'extra_metadata_index': 5.0},
 {'_id': ObjectId('61241d4480612a35577a218d'),
  'partitionFamily': 0,
  'splitNumber': 9,
  'stimNumbers': ['006',
   '022',
   '031',
   '032',
   '047',
   '048',
   '066',
   '089',
   '090',
   '094',
   '117',
   '121',
   '128'],
  'ntrials':

In [None]:
experiment_name

In [97]:
# metadata_pre_top_up = pd.DataFrame(coll.find())

# metadata_pre_top_up.to_csv('./metadata/first_batch_procedural.csv')

In [64]:
experiment_name

'ca_prior_elicitation_4_block_unique_silhouettes_categorization'