### VisDialPool 0.9 from VisDial 0.9
The following script reads the VisDial 0.9 dataset and constructs a new dataset, which we henceforth call VisDial-Pool 0.9. The new dataset contains fixed-size pools of (in this case) randomly picked images from the input dataset, meant to be used in the process of fine-tuning the VisDial bots. A variety of pools can and will be constructed as well, ranging from pools of easy-to-distinguish images to pools of highly similar images.

In [None]:
import json
import numpy as np
## Open and load the VisDial0.9 Validation JSON file
with open("../../Data/VisDial0.9/visdial_0.9_val.json", "r") as f:
    data = json.load(f)

In [26]:
num_dialogs = len(data['data']['dialogs'])
print num_dialogs

40504


In [27]:
coco_img_ids = [] # COCO image IDs in the JSON stored here
for iter in xrange(num_dialogs):
    coco_img_ids.append(data['data']['dialogs'][iter]['image_id'])

In [28]:
print len(coco_img_ids)

40504


In [29]:
np.random.shuffle(coco_img_ids) # Randomly shuffle the COCO image IDs

In [30]:
pool_size = 100 # The size of each pool
# Number of random pools; need to trim off last 4 images and dialogs (or len(coco_img_ids)%pool_size, to be precise)
num_pools = len(coco_img_ids)/pool_size

trim_start = len(coco_img_ids) - len(coco_img_ids)%pool_size
trim_end = len(coco_img_ids)

print(trim_start)
print(trim_end)


40500
40504


In [31]:
trim_img_ids = [coco_img_ids[index] for index in xrange(trim_start, trim_end)]

pools = dict()
img_pools = dict()

for pool_iter in xrange(num_pools):
    pool_id = pool_iter
    img_ids = []
    for img_iter in xrange(pool_size):
        img_id = coco_img_ids[pool_iter*pool_size + img_iter]
        img_ids.append(img_id)
        img_pools[img_id] = pool_id # Which pool does each image belong to (distinct by construction)?
    pools[pool_id] = img_ids # Each pool contains which images (distinct by construction)?


In [32]:
print(len(pools))
print(len(img_pools))
print(trim_img_ids)

405
40500
[132578, 49445, 396404, 376959]


In [33]:
# Store all the pools globally and later reference them in each dialog by index
data['data']['pools'] = [pools[pool_iter] for pool_iter in xrange(num_pools)]

In [35]:
del_indices = []
for iter in xrange(num_dialogs):
    img_id = data['data']['dialogs'][iter]['image_id']
    if img_id not in img_pools:
        if img_id not in trim_img_ids:
            print("INVESTIGATE! Something is seriously wrong with the dataset! Img ID = " + str(img_id))
        # No pool exists with this image, need to throw this dialog from the JSON and continue
        del_indices.append(iter)
        continue
    pool_id = img_pools[img_id]
    data['data']['dialogs'][iter]['pool_id'] = pool_id

print(del_indices)

[1985, 12049, 18338, 27456]


In [36]:
## Delete dialogs corresponding to images that are no longer to be considered
for index in del_indices:
    del data['data']['dialogs'][index]

In [37]:
## Storing additionally beneficial metadata
data['data']['pool_size'] = pool_size
data['type'] = "Val_RandomShufflePool"

with open("../../Data/VisDial0.9/visdialpool_0.9_val_randomshuffle.json", "w") as f:
    json.dump(data, f)