# Getting Sketch Partitions for Rating Study

## Fetch the sketch paths from that folder

In [1]:
import os
import json
import pandas as pd
import numpy as np


In [2]:
sketchpath = os.path.abspath('../sketches')
sketches = os.listdir(sketchpath) # list out all the sketches in that directory

batchdir = os.path.abspath('batches')

if not os.path.exists(batchdir):
    os.makedirs(batchdir)

## Assemble a dataframe from all the sketches

In [3]:
sketch_info = pd.DataFrame(columns = ["GameId","Animal","Cardinality","Trial","Cond","Version","URL"]) # initialize dataframe

for i in range(len(sketches)): # for every sketch
    name = sketches[i].split('•') # split up its metadata
    stimID = name[4].split('_') # ... by multiple delimiters
    stimurl = "https://iternum-sketches.s3.amazonaws.com/" + name[0] + '•' + stimID[0] + '_' + str(int(stimID[1])-1) + '•' + name[2] + '•' + name[3] + '•' + name[4]     
    row = np.array([name[0],stimID[0],stimID[1],name[2],name[3],stimID[2].split('.')[0],stimurl]) # put into relevant column
    sketch_info.loc[len(sketch_info)] = row # now append that to the sketch info dataframe
    
sketch_info

Unnamed: 0,GameId,Animal,Cardinality,Trial,Cond,Version,URL
0,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,1,22,number,057,https://iternum-sketches.s3.amazonaws.com/0074...
1,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,2,31,number,051,https://iternum-sketches.s3.amazonaws.com/0074...
2,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,3,25,number,062,https://iternum-sketches.s3.amazonaws.com/0074...
3,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,4,14,number,009,https://iternum-sketches.s3.amazonaws.com/0074...
4,0074-988d4ee1-5766-47b5-bcbb-49a720aee30d,bear,5,6,number,073,https://iternum-sketches.s3.amazonaws.com/0074...
...,...,...,...,...,...,...,...
1915,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,4,5,number,092,https://iternum-sketches.s3.amazonaws.com/9628...
1916,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,5,16,number,059,https://iternum-sketches.s3.amazonaws.com/9628...
1917,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,6,30,number,047,https://iternum-sketches.s3.amazonaws.com/9628...
1918,9628-d7914c9d-2ee9-4706-8d26-93a3fa8008e3,rabbit,7,21,number,081,https://iternum-sketches.s3.amazonaws.com/9628...


## Assemble the partitions

In [4]:
bag = sketch_info    # the bag of sketches to sample, because sampling without replacement

games = bag.GameId.unique() # we want this to be a unique list of all the games
paradigms = [] # this will be a list of dataframes, each dataframe containing the sketches to be rated by a rater
# paradigms = {} # actually, make that a dictionary


batch = 1
while len(bag) > 0: # sample from the bag without replacement
    paradigm = pd.DataFrame(columns = ["GameId","Animal","Cardinality","Trial","Cond","Version","URL"]) # initialize paradigm
    
    for i in range(len(games)): # we want each rater to see [no more than] one sketch from each game
        row = bag[bag['GameId']==games[i]].sample(n=1,replace=False) # sample a sketch at random from the game
        bag = bag.drop(index = row.index) # remove it from the bag
        paradigm = paradigm.append(row) # and tack on that sketch to this rater's paradigm
    paradigms.append(paradigm) # when a paradigm is assembled, but it into the list
#     paradigms[batch] = paradigm # rather than append to a list, add a key-value to a dictionary
    batch += 1
    
print(len(paradigms[5])) # Should be 32 paradigms of 60 sketches; each rater sees one per game, requiring 32 raters


# print(paradigms[1].iloc[3,6])    # print one of the urls
    

60


## And save the partitions out to a nice dictionary

In [6]:
for i in range(len(paradigms)):
    paradigm = paradigms[i]
    js = paradigm.to_json()
    with open(batchdir + '/batch_{}.txt'.format(str(i+1)), 'w') as outfile:
        json.dump(js, outfile)
    
    