In [51]:
import os
import torch
import numpy as np
import json
import collections

In [22]:
original_vqa_link_train = "/private/home/sash/.cache/torch/mmf/data/datasets/vqa2/defaults/annotations/imdb_train2014_len_coco_50_pc.npy"

with open(original_vqa_link_train) as f:
    original_vqas = json.load(f)
    original_vqas = original_vqas["annotations"] #443757

In [71]:
question_types_dict = collections.defaultdict(lambda: []) 

for original_vqa in original_vqas:
    question_type = original_vqa["answer_type"]
    question_types_dict[question_type].append(original_vqa)

In [74]:
sorted_vqa_questions = question_types_dict["other"] + question_types_dict["yes/no"] + question_types_dict['number']
len(sorted_vqa_questions)

443757

In [115]:
original_vqa_question_link_train = "/private/home/sash/mmf/mmf/v2_OpenEnded_mscoco_train2014_questions.json"
with open(original_vqa_question_link_train) as f:
    original_vqa_questions = json.load(f)
    original_vqa_questions = original_vqa_questions["questions"]
    
question_id_to_question_text = {}
for question in original_vqa_questions:
    question_id_to_question_text[question["question_id"]] = question["question"]

In [58]:
def stratify_sampling(x, n_samples, stratify):
    """Perform stratify sampling of a tensor.
    
    parameters
    ----------
    x: np.ndarray or torch.Tensor
        Array to sample from. Sampels from first dimension.
        
    n_samples: int
        Number of samples to sample
        
    stratify: tuple of int
        Size of each subgroup. Note that the sum of all the sizes 
        need to be equal to `x.shape[']`.
    """
    n_total = x.shape[0]
    assert sum(stratify) == n_total
    
    n_strat_samples = [int(i*n_samples/n_total) for i in stratify]
    cum_n_samples = np.cumsum([0]+list(stratify))
    sampled_idcs = []
    for i, n_strat_sample in enumerate(n_strat_samples):
        sampled_idcs.append(np.random.choice(range(cum_n_samples[i], cum_n_samples[i+1]), 
                                            replace=False, 
                                            size=n_strat_sample))
        
    # might not be correct number of samples due to rounding
    n_current_samples = sum(n_strat_samples)
    if  n_current_samples < n_samples:
        delta_n_samples = n_samples - n_current_samples
        # might actually resample same as before, but it's only for a few
        sampled_idcs.append(np.random.choice(range(n_total), replace=False, size=delta_n_samples))
        
    samples = x[np.concatenate(sampled_idcs), ...]
    
    return samples

In [110]:
samples = np.arange(len(sorted_vqa_questions))
n_samples = 30
other_len = len(question_types_dict["other"])
yesno_len = len(question_types_dict["yes/no"])
number_len = len(question_types_dict['number'])
stratify = [other_len, yesno_len, number_len]
output_indexes = stratify_sampling(samples, n_samples, stratify)

num_selected_types = collections.defaultdict(lambda: 0)
for output_index in output_indexes:
    if output_index < other_len:
        num_selected_types["other"] += 1
    elif output_index < other_len+yesno_len and output_index >= other_len:
        num_selected_types["yes/no"] += 1
    else:
        num_selected_types["number"] += 1

print(f"Total Selected: {n_samples}")
print(f"Other Selected: {num_selected_types['other']}/{other_len}")
print(f"YesNo Selected: {num_selected_types['yes/no']}/{yesno_len}")
print(f"Numbe Selected: {num_selected_types['number']}/{number_len}")

selected_vqa_questions = [(sorted_vqa_questions[output_index]["question_id"], sorted_vqa_questions[output_index]["image_id"]) for output_index in output_indexes]

Total Selected: 30
Other Selected: 15/219269
YesNo Selected: 12/166882
Numbe Selected: 3/57606


In [124]:
for question_id, image_id in selected_vqa_questions:
    print(f"question_id: {question_id}")
    print(f"text: {question_id_to_question_text[question_id]}")
    print(f"image_id: {image_id}")
    print(f"\n")

question_id: 395456001
text: What are the spots on the floor?
image_id: 395456


question_id: 283809004
text: Which remote is the biggest?
image_id: 283809


question_id: 555586014
text: What color is the man's tie?
image_id: 555586


question_id: 109816002
text: What is the guy in black holding in his hand?
image_id: 109816


question_id: 474601003
text: What gender is the birthday person?
image_id: 474601


question_id: 360441000
text: What street sign is at the bottom?
image_id: 360441


question_id: 480890001
text: Where is the man staring?
image_id: 480890


question_id: 524866016
text: What brand is this phone?
image_id: 524866


question_id: 88527004
text: What type of tie is he wearing?
image_id: 88527


question_id: 462512002
text: Which way is the convertible turning?
image_id: 462512


question_id: 576809001
text: Where are the orange stripes?
image_id: 576809


question_id: 333848003
text: What breed is the dog?
image_id: 333848


question_id: 180098000
text: Is the bridge 

In [2]:
# To see sizes to predict cost:
import json

In [3]:
vqa_val_qs_path = "/private/home/sash/vqa2/v2_OpenEnded_mscoco_val2014_questions.json"
vqa_test_qs_path = "/private/home/sash/vqa2/v2_OpenEnded_mscoco_test2015_questions.json"
vqa_test_dev_qs_path = "/private/home/sash/vqa2/v2_OpenEnded_mscoco_test-dev2015_questions.json"


with open(vqa_val_qs_path) as f:
    vqa_val_qs = json.load(f)

with open(vqa_test_qs_path) as f:
    vqa_test_qs = json.load(f)

with open(vqa_test_dev_qs_path) as f:
    vqa_test_dev_qs = json.load(f)

In [4]:
print(f"val len: {len(vqa_val_qs['questions'])}")
print(f"test len: {len(vqa_test_qs['questions'])}")
print(f"test-dev len: {len(vqa_test_dev_qs['questions'])}")

val len: 214354
test len: 447793
test-dev len: 107394


In [15]:
# no overlap between test and test-dev in terms of question_id


In [17]:
len(list(test_qs_set - test_dev_qs_set))

340399

In [18]:
447793-107394

340399

In [8]:
# Check image height and image width
image_info_test2015_file = "/private/home/sash/dyna/image_info_test2015/image_info_test2015.json"
person_val2014_file = "/private/home/sash/dyna/annotations_trainval2014/person_keypoints_val2014.json"
person_train_file = "/private/home/sash/dyna/annotations_trainval2014/person_keypoints_train2014.json"

with open(image_info_test2015_file) as f:
    image_info_test2015 = json.load(f)
    
with open(person_val2014_file) as f:
    person_val2014 = json.load(f)
    
with open(person_train_file) as f:
    person_train = json.load(f)

In [13]:
image_info_test2015 = image_info_test2015["images"]
person_val2014 = person_val2014["images"]
person_train = person_train["images"]

In [14]:
len(image_info_test2015), len(person_val2014), len(person_train)

(81434, 40504, 82783)

In [15]:
image_ids_from_image_info_test2015 = set(image["coco_url"] for image in image_info_test2015)
image_ids_from_person_val2014 = set(image["coco_url"] for image in person_val2014)
image_ids_from_person_train = set(image["coco_url"] for image in person_train)

In [40]:
total_images_from_coco = person_val2014 + person_train + image_info_test2015
def data_from_image(image):
    return {
        "coco_url": image["coco_url"],
        "height": image["height"],
        "width": image["width"],
    }


person_val2014_images = []
for image in person_val2014:
    person_val2014_images.append(data_from_image(image))
    
person_train_images = []
for image in person_train:
    person_train_images.append(data_from_image(image))

image_info_test2015_images = []
for image in image_info_test2015:
    image_info_test2015_images.append(data_from_image(image))

In [43]:
with open("train.json", "w") as f:
    json.dump({"train": person_train_images}, f)
    
with open("val.json", "w") as f:
    json.dump({"val": person_val2014_images}, f)
    
with open("test.json", "w") as f:
    json.dump({"test": image_info_test2015_images}, f)

In [30]:
max_aspect_ratio

4.102564102564102

In [35]:
min_width, height

(59, 72)

In [36]:
min_height, width

(51, 72)

In [38]:
min_height_url, min_width_url

('http://images.cocodataset.org/train2014/COCO_train2014_000000187714.jpg',
 'http://images.cocodataset.org/train2014/COCO_train2014_000000363747.jpg')