# Getting Sketch Partitions for Rating Study

## Fetch the sketch paths from that folder

In [None]:
import os
import json
import pandas as pd
import numpy as np

In [None]:
proj_dir = os.path.abspath('..')
exp_name = 'rate_iternum'
exp_dir = os.path.join(proj_dir,exp_name)
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))

sketches = os.listdir(sketch_dir) # list out all the sketches in that directory

# batchdir = os.path.abspath('../experiments/rate_iternum/batches')

# if not os.path.exists(batchdir):
#     os.makedirs(batchdir)

## Assemble a dataframe from all the sketches

In [None]:
sketch_info = pd.DataFrame(columns = ["GameId","Animal","Cardinality","Trial","Cond","Version","URL"]) # initialize dataframe

for i in range(len(sketches)): # for every sketch
    name = sketches[i].split('_') # split up its metadata
    stimID = name[4].split('_') # ... by multiple delimiters
    stimurl = "https://iternum-sketches.s3.amazonaws.com/" + name[0] + '+' + stimID[0] + '_' + str(int(stimID[1])-1) + '+' + name[2] + '+' + name[3] + '+' + name[4]     
    row = np.array([name[0],stimID[0],stimID[1],name[2],name[3],stimID[2].split('.')[0],stimurl]) # put into relevant column
    sketch_info.loc[len(sketch_info)] = row # now append that to the sketch info dataframe
    
sketch_info

In [None]:
sketch_info.iloc[0]['URL']

## Assemble the partitions

In [None]:
bag = sketch_info    # the bag of sketches to sample, because sampling without replacement

games = bag.GameId.unique() # we want this to be a unique list of all the games
paradigms = [] # this will be a list of dataframes, each dataframe containing the sketches to be rated by a rater
# paradigms = {} # actually, make that a dictionary


batch = 1
while len(bag) > 0: # sample from the bag without replacement
    paradigm = pd.DataFrame(columns = ["GameId","Animal","Cardinality","Trial","Cond","Version","URL"]) # initialize paradigm
    
    for i in range(len(games)): # we want each rater to see [no more than] one sketch from each game
        row = bag[bag['GameId']==games[i]].sample(n=1,replace=False) # sample a sketch at random from the game
        bag = bag.drop(index = row.index) # remove it from the bag
        paradigm = paradigm.append(row) # and tack on that sketch to this rater's paradigm
    paradigms.append(paradigm.reset_index(drop=True)) # when a paradigm is assembled, but it into the list
#     paradigms[batch] = paradigm # rather than append to a list, add a key-value to a dictionary
    batch += 1
    
num_partitions = len(paradigms[0])    
print('We have {} unique partitions.'.format(num_partitions)) # Should be 32 paradigms of 60 sketches; each rater sees one per game, requiring 32 raters

# print(paradigms[1].iloc[3,6])    # print one of the urls
    

## Convert list of dataframes to an array of JSON objects

In [None]:
this_paradigm = paradigms[0]

In [None]:
this_paradigm.iloc[0]['URL']

In [None]:
#this_paradigm.to_json(orient='records')

In [None]:
## Objective: to save out a stimList.js that contain a dictionary of dictionaries
## stimList = {{'versionID': 0, 'meta':{...}}}
## "versionID" refers to the specific partition ID
## "meta" refers to the metadata corresponding to that partition, e.g., paradigms[0]

In [None]:
# for i in range(len(paradigms)):
#     paradigm = paradigms[i]
#     paradigm.to_csv(batchdir + '/batch_{}.csv'.format(str(i+1)))
    
    #had it saving to jsons earlier but I don't think it worked wells
#     js = paradigm.to_json()
#     with open(batchdir + '/batch_{}.csv'.format(str(i+1)), 'w') as outfile:
#         json.dump(js, outfile)
    
    