# Getting Sketch Partitions for Rating Study

## Fetch the sketch paths from that folder

In [None]:
import os
import json
import pandas as pd
import numpy as np
from IPython.display import clear_output


In [None]:
proj_dir = os.path.abspath('../..')
exp_name = 'classify_iternum'
exp_dir = os.path.join(proj_dir,exp_name)
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))

full_stim_paths = os.listdir(sketch_dir) # list out all the sketches in that directory
sketches = [i for i in full_stim_paths if i.split('/')[-1] != '.DS_Store']

stimListDir = os.path.abspath('../../experiments/classify_iternum/stimList')

if not os.path.exists(stimListDir):
    os.makedirs(stimListDir)

## Assemble a dataframe from all the sketches

In [None]:
sketch_info = pd.DataFrame(columns = ["orig_gameid","orig_animal","orig_cardinality","orig_trial","orig_cond","orig_version","sketch_url"]) # initialize dataframe

for i in range(len(sketches)): # for every sketch
    name = sketches[i].split('_') # split up its metadata
                                                        #    gameID         animal            cardinality               trialnum       condition      stim_version
    stimurl = "https://iternum-sketches.s3.amazonaws.com/" + name[0] + '_' + name[1] + '_' + str(int(name[2])+1) + '_' + name[3] + '_' + name[4] + '_' + name[5]

    # following two lines are dead:
#     stimID = name[4].split('_') # ... by multiple delimiters
#     stimurl = "https://iternum-sketches.s3.amazonaws.com/" + name[0] + '_' + stimID[0] + '_' + str(int(stimID[1])-1) + '_' + name[2] + '_' + name[3] + '_' + name[4]     
    row = np.array([name[0],name[1],str(int(name[2])+1),name[3],name[4],name[5].split('.')[0],stimurl]) # put into relevant column
    sketch_info.loc[len(sketch_info)] = row # now append that to the sketch info dataframe
    
sketch_info

In [None]:
sketch_info.iloc[0]['sketch_url']

## Assemble the partitions

In [None]:
bag = sketch_info    # the bag of sketches to sample, because sampling without replacement

games = bag.orig_gameid.unique() # we want this to be a unique list of all the games
paradigms = [] # this will be a list of dataframes, each dataframe containing the sketches to be rated by a rater


batch = 0
while len(bag) > 0: # sample from the bag without replacement
    
    # initialize paradigm 
    number_paradigm = {'versionID':batch,        # which partition is it (set of sketches) ?
                       'classify_condition':'number',  # what feature will mturk recognizers be asked to classify?
                       'games':[],               # empty list to be filled with classification games as they happen    
                       'meta':[]}                # the whole [unordered] trial list goes in this 'meta' structure
    shape_paradigm = {'versionID':batch+1,        # which partition is it (set of sketches) ?
                       'classify_condition':'shape',  # what feature will mturk recognizers be asked to classify?
                       'games':[],               # empty list to be filled with classification games as they happen    
                       'meta':[]}                # the whole [unordered] trial list goes in this 'meta' structure

    
    for i in range(len(games)): # we want each rater to see [no more than] one sketch from each game
        trial = {} # initialize a dictionary for this rater, 1 game per trial
        
        row = bag[bag['orig_gameid']==games[i]].sample(n=1,replace=False,random_state=333) # sample a sketch at random from the game
        bag = bag.drop(index = row.index) # remove it from the bag
        
        trial["orig_gameid"] = row.iloc[0]["orig_gameid"]
        trial["orig_animal"] = row.iloc[0]["orig_animal"]
        trial["orig_cardinality"] = row.iloc[0]["orig_cardinality"]
        trial["orig_trial"] = row.iloc[0]["orig_trial"]
        trial["orig_cond"] = row.iloc[0]["orig_cond"]
        trial["orig_version"] = row.iloc[0]["orig_version"]
        trial["sketch_url"] = row.iloc[0]["sketch_url"]
        
        number_paradigm['meta'].append(trial)
        shape_paradigm['meta'].append(trial)
    
    # when a paradigm is assembled, put it into the list:
    paradigms.append(number_paradigm) # first put the version for people classifying the shape info     
    paradigms.append(shape_paradigm) # then store the exact same data structure but change the classification goal
    
    batch += 2
    
num_partitions = len(paradigms)    
print('We have {} unique partitions.'.format(num_partitions)) # Should be 32*2=64 paradigms of 61 sketches; each rater sees one per game, requiring 64 raters        

# print(paradigms[1].iloc[3,6])    # print one of the urls
    

In [None]:
# for i in paradigms:
#     print(i['versionID'], i['classify_condition'])

## Put that datastructure into Mongo

In [None]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'cogtoolslab.org'

import pymongo as pm
import socket
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1:27017') 
db = conn['stimuli']
coll = db['iternum_classification']


In [None]:
## now really insert data
reallyRun = False
if reallyRun:
    for (i,j) in enumerate(paradigms):
        print ('%d of %d uploaded ...' % (i+1,len(paradigms)))
        clear_output(wait=True)
        coll.insert_one(j)
print('Done!')


In [None]:
# coll.find_one()

## No longer relevant: Convert dictionary of dictionaries to an array of JSON objects

In [None]:
# this_paradigm = paradigms[0]
# this_paradigm.iloc[0]['URL']
# this_paradigm.to_json(orient='records')

## Objective: to save out a stimList.js that contain a dictionary of dictionaries
## stimList = { {'versionID': 0, 'meta':{...}} , }
## "versionID" refers to the specific partition ID
## "meta" refers to the metadata corresponding to that partition, e.g., paradigms[0]

# filename = "stimList.js"
# pathname = os.path.join(stimListDir,filename)
# with open(pathname, 'w') as the_file:
#     the_file.write(str(paradigms))

In [None]:
# the following block of code is no longer relevant:

# for i in range(len(paradigms)):
#     paradigm = paradigms[i]
#     paradigm.to_csv(batchdir + '/batch_{}.csv'.format(str(i+1)))
    
    #had it saving to jsons earlier but I don't think it worked wells
#     js = paradigm.to_json()
#     with open(batchdir + '/batch_{}.csv'.format(str(i+1)), 'w') as outfile:
#         json.dump(js, outfile)
    
    