# Getting Sketch Partitions for Rating Study

## Fetch the sketch paths from that folder

In [44]:
import os
import json
import pandas as pd
import numpy as np

In [108]:
proj_dir = os.path.abspath('../..')
exp_name = 'classify_iternum'
exp_dir = os.path.join(proj_dir,exp_name)
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))

full_stim_paths = os.listdir(sketch_dir) # list out all the sketches in that directory
sketches = [i for i in full_stim_paths if i.split('/')[-1] != '.DS_Store']

stimListDir = os.path.abspath('../../experiments/classify_iternum/stimList')

if not os.path.exists(stimListDir):
    os.makedirs(stimListDir)

## Assemble a dataframe from all the sketches

In [47]:
sketch_info = pd.DataFrame(columns = ["GameId","Animal","Cardinality","Trial","Cond","Version","URL"]) # initialize dataframe

for i in range(len(sketches)): # for every sketch
    name = sketches[i].split('_') # split up its metadata
                                                        #    gameID         animal            cardinality               trialnum       condition      stim_version
    stimurl = "https://iternum-sketches.s3.amazonaws.com/" + name[0] + '_' + name[1] + '_' + str(int(name[2])+1) + '_' + name[3] + '_' + name[4] + '_' + name[7]

    # following two lines are dead:
#     stimID = name[4].split('_') # ... by multiple delimiters
#     stimurl = "https://iternum-sketches.s3.amazonaws.com/" + name[0] + '_' + stimID[0] + '_' + str(int(stimID[1])-1) + '_' + name[2] + '_' + name[3] + '_' + name[4]     
    row = np.array([name[0],name[1],str(int(name[2])+1),name[3],name[4],name[7].split('.')[0],stimurl]) # put into relevant column
    sketch_info.loc[len(sketch_info)] = row # now append that to the sketch info dataframe
    
sketch_info

Unnamed: 0,GameId,Animal,Cardinality,Trial,Cond,Version,URL
0,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,1,22,number,057,https://iternum-sketches.s3.amazonaws.com/0074...
1,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,2,31,number,051,https://iternum-sketches.s3.amazonaws.com/0074...
2,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,3,25,number,062,https://iternum-sketches.s3.amazonaws.com/0074...
3,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,4,14,number,009,https://iternum-sketches.s3.amazonaws.com/0074...
4,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,5,6,number,073,https://iternum-sketches.s3.amazonaws.com/0074...
...,...,...,...,...,...,...,...
1947,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,4,5,number,092,https://iternum-sketches.s3.amazonaws.com/9628...
1948,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,5,16,number,059,https://iternum-sketches.s3.amazonaws.com/9628...
1949,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,6,30,number,047,https://iternum-sketches.s3.amazonaws.com/9628...
1950,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,7,21,number,081,https://iternum-sketches.s3.amazonaws.com/9628...


In [48]:
sketch_info.iloc[0]['URL']

'https://iternum-sketches.s3.amazonaws.com/0074-988d4ee1-5766-47b5-bcbb-49a720aee30d_bear_1_22_number_057.png'

## Assemble the partitions

In [94]:
bag = sketch_info    # the bag of sketches to sample, because sampling without replacement

games = bag.GameId.unique() # we want this to be a unique list of all the games
# paradigms = [] # this will be a list of dataframes, each dataframe containing the sketches to be rated by a rater
paradigms = {} # actually, make that a dictionary


batch = 1
while len(bag) > 0: # sample from the bag without replacement
    
    paradigm = {} # initialize paradigm 
    # the following line is for if we want to dataframes, not dictionaries:
#     paradigm = pd.DataFrame(columns = ["GameId","Animal","Cardinality","Trial","Cond","Version","URL"]) # initialize paradigm
    
    for i in range(len(games)): # we want each rater to see [no more than] one sketch from each game
        paradigm[i+1] = {} # initialize a dictionary for this rater, 1 game per trial
        
        row = bag[bag['GameId']==games[i]].sample(n=1,replace=False,random_state=333) # sample a sketch at random from the game
        bag = bag.drop(index = row.index) # remove it from the bag
        
        paradigm[i+1]["GameId"] = row.iloc[0]["GameId"]
        paradigm[i+1]["Animal"] = row.iloc[0]["Animal"]
        paradigm[i+1]["Cardinality"] = row.iloc[0]["Cardinality"]
        paradigm[i+1]["Trial"] = row.iloc[0]["Trial"]
        paradigm[i+1]["Cond"] = row.iloc[0]["Cond"]
        paradigm[i+1]["Version"] = row.iloc[0]["Version"]
        paradigm[i+1]["URL"] = row.iloc[0]["URL"]
        # the following line is for if we want to dataframes, not dictionaries:
#         paradigm = paradigm.append(row) # and tack on that sketch to this rater's paradigm
        
#     paradigms.append(paradigm.reset_index(drop=True)) # when a paradigm is assembled, but it into the list
    paradigms[batch] = paradigm # rather than append to a list, add a key-value to a dictionary
    batch += 1
    
num_partitions = len(paradigms.keys())    
print('We have {} unique partitions.'.format(num_partitions)) # Should be 32 paradigms of 61 sketches; each rater sees one per game, requiring 32 raters

# print(paradigms[1].iloc[3,6])    # print one of the urls
    

We have 32 unique partitions.


## Convert dictionary of dictionaries to an array of JSON objects

In [71]:
# this_paradigm = paradigms[0]
# this_paradigm.iloc[0]['URL']
# this_paradigm.to_json(orient='records')

In [None]:
## Objective: to save out a stimList.js that contain a dictionary of dictionaries
## stimList = { {'versionID': 0, 'meta':{...}} , }
## "versionID" refers to the specific partition ID
## "meta" refers to the metadata corresponding to that partition, e.g., paradigms[0]

In [109]:
filename = "stimList.js"
pathname = os.path.join(stimListDir,filename)
with open(pathname, 'w') as the_file:
    the_file.write(str(paradigms)s)

In [None]:
# the following block of code is no longer relevant:

# for i in range(len(paradigms)):
#     paradigm = paradigms[i]
#     paradigm.to_csv(batchdir + '/batch_{}.csv'.format(str(i+1)))
    
    #had it saving to jsons earlier but I don't think it worked wells
#     js = paradigm.to_json()
#     with open(batchdir + '/batch_{}.csv'.format(str(i+1)), 'w') as outfile:
#         json.dump(js, outfile)
    
    