Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Add VQA V2.0 and Visual Dialog V0.9. #54

Merged
merged 9 commits into from
May 12, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions parlai/tasks/visdial/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# of patent rights can be found in the PATENTS file in the same directory.

from parlai.core.dialog_teacher import DialogTeacher
from .build import build
from .build import build, buildImage

from PIL import Image
import json
Expand All @@ -14,6 +14,7 @@

def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
Expand All @@ -28,7 +29,7 @@ def _path(opt):
data_path = os.path.join(opt['datapath'], 'VisDial-v0.9',
'visdial_0.9_' + suffix + '.json')

image_path = os.path.join(opt['download_path'], img_suffix)
image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, image_path

Expand All @@ -50,7 +51,7 @@ class DefaultTeacher(DialogTeacher):
def __init__(self, opt, shared=None):

self.datatype = opt['datatype']
data_path, image_path = _path(opt)
data_path, self.image_path = _path(opt)
opt['datafile'] = data_path
self.id = 'visdial'

Expand All @@ -66,8 +67,10 @@ def setup_data(self, path):

for dialog in self.visdial['data']['dialogs']:
# for each dialog
image_id = dialog['dialog']
image_id = dialog['image_id']
caption = dialog['caption']
img_path = self.image_path + '%012d.jpg' % (image_id)

episode_done = False
for i, qa in enumerate(dialog['dialog']):
if i == len(dialog['dialog']):
Expand All @@ -80,4 +83,4 @@ def setup_data(self, path):
answer_options.append(self.answers[ans_id])
#answer_options = qa['answer_options']
gt_index = qa['gt_index']
yield (question, answer, 'None', answer_options), True
yield (question, answer, 'None', answer_options, img_path), True
27 changes: 27 additions & 0 deletions parlai/tasks/visdial/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,33 @@
import os


def buildImage(opt):
dpath = os.path.join(opt['datapath'], 'COCO-IMG')

if not build_data.built(dpath):
print('[building image data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)

# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'

url = 'http://msvocds.blob.core.windows.net/coco2014/'

build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)
build_data.download(dpath, url + fname3)

build_data.untar(dpath, fname1, False)
build_data.untar(dpath, fname2, False)
build_data.untar(dpath, fname3, False)

# Mark the data as built.
build_data.mark_done(dpath)


def build(opt):
dpath = os.path.join(opt['datapath'], 'VisDial-v0.9')

Expand Down
77 changes: 61 additions & 16 deletions parlai/tasks/vqa_coco2014_v2/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import json
import random
import os
import pdb

def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
Expand All @@ -35,45 +37,74 @@ def _path(opt):
annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
annotation_suffix + '_annotations.json')

image_path = os.path.join(opt['download_path'], img_suffix)
image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, annotation_path, image_path


def _image_loader(path):
def _image_loader(opt, path):
"""
Loads the appropriate image from the image_id and returns PIL Image format.
"""
return Image.open(path).convert('RGB')
if not opt.get('no_images', False):
return Image.open(path).convert('RGB')
else:
return None


class OeTeacher(Teacher):
"""
Hand-written VQA Open-Ended teacher, which loads the json vqa data and
implements its own `act` method for interacting with student
VQA v2.0 Open-Ended teacher, which loads the json vqa data and implements its
own `act` method for interacting with student agent.
agent.
"""
def __init__(self, opt, shared=None):
super().__init__(opt)
self.datatype = opt['datatype']
data_path, annotation_path, image_path = _path(opt)
self._setup_data(data_path, annotation_path, image_path)
self.episode_idx = -1
data_path, annotation_path, self.image_path = _path(opt)

if shared and 'ques' in shared:
self.ques = shared['ques']
if 'annotation' in shared:
self.annotation = shared['annotation']
else:
self._setup_data(data_path, annotation_path)


# for ordered data in batch mode (especially, for validation and
# testing), each teacher in the batch gets a start index and a step
# size so they all process disparate sets of the data
self.step_size = opt.get('batchsize', 1)
self.data_offset = opt.get('batchindex', 0)

self.reset()

def __len__(self):
return self.len

# return state/action dict based upon passed state
def reset(self):
# Reset the dialog so that it is at the start of the epoch,
# and all metrics are reset.
super().reset()
self.lastY = None
self.episode_idx = self.data_offset - self.step_size

def observe(self, observation):
"""Process observation for metrics."""
if self.lastY is not None:
loss = self.metrics.update(observation, self.lastY)
self.lastY = None
return observation

def act(self):
if self.datatype == 'train':
self.episode_idx = random.randrange(self.len)
else:
self.episode_idx = (self.episode_idx + 1) % self.len
# always showing the same index now.

qa = self.ques['questions'][self.episode_idx]
question = qa['question']
image_id = qa['image_id']
# question_id = qa['question_id']

if self.datatype != 'test':
anno = self.annotation['annotations'][self.episode_idx]
Expand All @@ -83,14 +114,29 @@ def act(self):

img_path = self.image_path + '%012d.jpg' % (image_id)

return {
'image': _image_loader(img_path),
action = {
'image': _image_loader(self.opt, img_path),
'text': question,
'labels': answers,
'episode_done': True
}

def _setup_data(self, data_path, annotation_path, image_path):
if not self.datatype.startswith('test'):
anno = self.annotation['annotations'][self.episode_idx]
self.lastY = [ans['answer'] for ans in anno['answers']]

if self.datatype.startswith('train'):
action['labels'] = self.lastY

return action

def share(self):
shared = super().share()
shared['ques'] = self.ques
if hasattr(self, 'annotation'):
shared['annotation'] = self.annotation
return shared

def _setup_data(self, data_path, annotation_path):
print('loading: ' + data_path)
with open(data_path) as data_file:
self.ques = json.load(data_file)
Expand All @@ -100,7 +146,6 @@ def _setup_data(self, data_path, annotation_path, image_path):
with open(annotation_path) as data_file:
self.annotation = json.load(data_file)

self.image_path = image_path
self.len = len(self.ques['questions'])

class DefaultTeacher(OeTeacher):
Expand Down
36 changes: 21 additions & 15 deletions parlai/tasks/vqa_coco2014_v2/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,30 @@
import os


def buildImage(dpath):
print('[building image data: ' + dpath + ']')
# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'
def buildImage(opt):
dpath = os.path.join(opt['datapath'], 'COCO-IMG')

url = 'http://msvocds.blob.core.windows.net/coco2014/'
if not build_data.built(dpath):
print('[building image data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'

build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)
build_data.download(dpath, url + fname3)
url = 'http://msvocds.blob.core.windows.net/coco2014/'

build_data.untar(dpath, fname1, False)
build_data.untar(dpath, fname2, False)
build_data.untar(dpath, fname3, False)
build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)
build_data.download(dpath, url + fname3)

build_data.untar(dpath, fname1, False)
build_data.untar(dpath, fname2, False)
build_data.untar(dpath, fname3, False)

# Mark the data as built.
build_data.mark_done(dpath)



Expand Down Expand Up @@ -58,7 +66,5 @@ def build(opt):
build_data.untar(dpath, fname4)
build_data.untar(dpath, fname5)

# buildImage(dpath)

# Mark the data as built.
build_data.mark_done(dpath)