In [1]:
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
%matplotlib inline
import pathlib
import os
import random
import json

In [2]:
os.chdir("..")
from vqa import VQA

In [3]:
#Provide paths to the images
train_imgs_path = "/home/bartek/ETH/CS4NLP/project/train2014"
val_imgs_path = "/home/bartek/ETH/CS4NLP/project/val2014"
random.seed(2137)
pathlib.Path.cwd()

PosixPath('/home/bartek/ETH/CS4NLP/project/free-form-VQA/code')

## Training dataset

In [4]:
#Provide path to the local COCO API folder
dataDir = "/home/bartek/ETH/CS4NLP/project/coco/PythonAPI"
dataType='train2014'

In [5]:
annFile = '{}/annotations/captions_{}.json'.format(dataDir,dataType)
coco_caps=COCO(annFile)
img_ids = coco_caps.getImgIds()

loading annotations into memory...
Done (t=0.56s)
creating index...
index created!


In [6]:
coco_caps.loadImgs(img_ids[np.random.randint(0,len(img_ids))])[0]

{'license': 3,
 'file_name': 'COCO_train2014_000000347331.jpg',
 'coco_url': 'http://images.cocodataset.org/train2014/COCO_train2014_000000347331.jpg',
 'height': 426,
 'width': 640,
 'date_captured': '2013-11-20 21:26:32',
 'flickr_url': 'http://farm9.staticflickr.com/8475/8384601041_6917c9cbfa_z.jpg',
 'id': 347331}

In [7]:
image_ids_subsample = random.sample(img_ids,int(0.05*len(img_ids)))
len(image_ids_subsample)

4139

In [8]:
i=0
annIds = coco_caps.getAnnIds(imgIds=image_ids_subsample[i])
print(img_ids[0])
anns = coco_caps.loadAnns(annIds)
coco_caps.showAnns(anns)

57870
a sheep and two babies in the grass

A is sheep grazing and her lambs are resting.
A mother animal and her babies eating grass.
A sheep grazing next to her baby sheep in a green grass covered field.
Three lambs, one adult and two smalls ones on a green pasture.


In [9]:
vqa = VQA(annotation_file= "../data/v2_mscoco_train2014_annotations.json", question_file="../data/v2_OpenEnded_mscoco_train2014_questions.json")
vqa_val = VQA(annotation_file= "../data/v2_mscoco_val2014_annotations.json", question_file="../data/v2_OpenEnded_mscoco_val2014_questions.json")

q_ids_train = vqa.getImgIds()
q_ids_val = vqa_val.getImgIds()
q_ids_all = q_ids_train+q_ids_val

loading VQA annotations and questions into memory...
0:00:05.957736
creating index...
index created!
loading VQA annotations and questions into memory...
0:00:03.257980
creating index...
index created!


In [10]:
max(q_ids_all)

581929

In [11]:
questions_set = ["What can be seen in this image?", "What is in this image?", "What this image depicts?"]



q_id = max(q_ids_all)

annotations = []
questions = []

for i in range(len(image_ids_subsample)):
    question_idx = random.randint(0,2) 
    annIds = coco_caps.getAnnIds(imgIds=image_ids_subsample[i])
    anns = coco_caps.loadAnns(annIds)

    capt = [an["caption"] for an in anns]
    annot = {"question_type": "describe image","answers": capt,"image_id": image_ids_subsample[i],
             "answer_type": "caption", "question_id": q_id}
    annotations.append(annot)
    q_item = {"question_id" : q_id, "image_id": image_ids_subsample[i], 
              "question" : questions_set[question_idx]}
    questions.append(q_item)
    q_id+=1
    
next_qid = q_id+1

# annotations

In [12]:
with open("../data/caption_data/annotations_training.json", "w+") as f:
    json.dump({"annotations": annotations}, f )
    
with open("../data/caption_data/questions_training.json", "w+") as q:
    json.dump({"questions": questions}, q)


## Validation dataset

In [13]:
dataDir = "/home/bartek/ETH/CS4NLP/project/coco/PythonAPI"
dataType='val2014'

In [14]:
annFile = '{}/annotations/captions_{}.json'.format(dataDir,dataType)
coco_caps=COCO(annFile)
img_ids = coco_caps.getImgIds()
image_ids_subsample = random.sample(img_ids,int(0.05*len(img_ids)))
len(image_ids_subsample)

loading annotations into memory...
Done (t=0.26s)
creating index...
index created!


2025

In [15]:
question_ids = [299334,299335, 299336]
questions_set = ["What can be seen in this image?", "What is in this image?", "What this image depicts?"]

assert question_ids not in q_ids_all

annotations = []
questions = []

for i in range(len(image_ids_subsample)):
    question_idx = random.randint(0,2) 
    annIds = coco_caps.getAnnIds(imgIds=image_ids_subsample[i])
    anns = coco_caps.loadAnns(annIds)

    capt = [an["caption"] for an in anns]
    annot = {"question_type": "describe image","answers": capt,"image_id": image_ids_subsample[i],
             "answer_type": "caption", "question_id": next_qid}
    annotations.append(annot)
    q_item = {"question_id" : next_qid, "image_id": image_ids_subsample[i], 
              "question" : questions_set[question_idx]}
    questions.append(q_item)
    next_qid+=1

In [16]:
with open("../data/caption_data/annotations_validation.json", "w+") as f:
    json.dump({"annotations": annotations}, f )
    
with open("../data/caption_data/questions_validation.json", "w+") as q:
    json.dump({"questions": questions}, q)
