facebookresearch · alexholdenmiller · May 12, 2017 · May 8, 2017 · May 8, 2017 · May 8, 2017
diff --git a/parlai/tasks/visdial/agents.py b/parlai/tasks/visdial/agents.py
@@ -5,7 +5,7 @@
 # of patent rights can be found in the PATENTS file in the same directory.
 
 from parlai.core.dialog_teacher import DialogTeacher
-from .build import build
+from .build import build, buildImage
 
 from PIL import Image
 import json
@@ -14,6 +14,7 @@
 
 def _path(opt):
     build(opt)
+    buildImage(opt)
     dt = opt['datatype'].split(':')[0]
 
     if dt == 'train':
@@ -28,7 +29,7 @@ def _path(opt):
     data_path = os.path.join(opt['datapath'], 'VisDial-v0.9',
         'visdial_0.9_' + suffix + '.json')
 
-    image_path = os.path.join(opt['download_path'], img_suffix)
+    image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)
 
     return data_path, image_path
 
@@ -50,7 +51,7 @@ class DefaultTeacher(DialogTeacher):
     def __init__(self, opt, shared=None):
 
         self.datatype = opt['datatype']
-        data_path, image_path = _path(opt)
+        data_path, self.image_path = _path(opt)
         opt['datafile'] = data_path
         self.id = 'visdial'
 
@@ -66,8 +67,10 @@ def setup_data(self, path):
 
         for dialog in self.visdial['data']['dialogs']:
             # for each dialog
-            image_id = dialog['dialog']
+            image_id = dialog['image_id']
             caption = dialog['caption']
+            img_path = self.image_path + '%012d.jpg' % (image_id)
+
             episode_done = False
             for i, qa in enumerate(dialog['dialog']):
                 if i == len(dialog['dialog']):
@@ -80,4 +83,4 @@ def setup_data(self, path):
                     answer_options.append(self.answers[ans_id])
                 #answer_options = qa['answer_options']
                 gt_index = qa['gt_index']
-                yield (question, answer, 'None', answer_options), True
+                yield (question, answer, 'None', answer_options, img_path), True
diff --git a/parlai/tasks/visdial/build.py b/parlai/tasks/visdial/build.py
@@ -9,6 +9,33 @@
 import os
 
 
+def buildImage(opt):
+    dpath = os.path.join(opt['datapath'], 'COCO-IMG')
+
+    if not build_data.built(dpath):
+        print('[building image data: ' + dpath + ']')
+        build_data.remove_dir(dpath)
+        build_data.make_dir(dpath)
+
+        # download the image data.
+        fname1 = 'train2014.zip'
+        fname2 = 'val2014.zip'
+        fname3 = 'test2014.zip'
+
+        url = 'http://msvocds.blob.core.windows.net/coco2014/'
+
+        build_data.download(dpath, url + fname1)
+        build_data.download(dpath, url + fname2)
+        build_data.download(dpath, url + fname3)
+
+        build_data.untar(dpath, fname1, False)
+        build_data.untar(dpath, fname2, False)
+        build_data.untar(dpath, fname3, False)
+
+        # Mark the data as built.
+        build_data.mark_done(dpath)
+
+
 def build(opt):
     dpath = os.path.join(opt['datapath'], 'VisDial-v0.9')
 

diff --git a/parlai/tasks/vqa_coco2014_v2/agents.py b/parlai/tasks/vqa_coco2014_v2/agents.py
@@ -11,9 +11,11 @@
 import json
 import random
 import os
+import pdb
 
 def _path(opt):
     build(opt)
+    buildImage(opt)
     dt = opt['datatype'].split(':')[0]
 
     if dt == 'train':
@@ -35,45 +37,74 @@ def _path(opt):
     annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
         annotation_suffix + '_annotations.json')
 
-    image_path = os.path.join(opt['download_path'], img_suffix)
+    image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)
 
     return data_path, annotation_path, image_path
 
 
-def _image_loader(path):
+def _image_loader(opt, path):
     """
     Loads the appropriate image from the image_id and returns PIL Image format.
     """
-    return Image.open(path).convert('RGB')
+    if not opt.get('no_images', False):
+        return Image.open(path).convert('RGB')
+    else:
+        return None
 
 
 class OeTeacher(Teacher):
     """
-    Hand-written VQA Open-Ended teacher, which loads the json vqa data and
-    implements its own `act` method for interacting with student
+    VQA v2.0 Open-Ended teacher, which loads the json vqa data and implements its
+    own `act` method for interacting with student agent.
     agent.
     """
     def __init__(self, opt, shared=None):
         super().__init__(opt)
         self.datatype = opt['datatype']
-        data_path, annotation_path, image_path = _path(opt)
-        self._setup_data(data_path, annotation_path, image_path)
-        self.episode_idx = -1
+        data_path, annotation_path, self.image_path = _path(opt)
+
+        if shared and 'ques' in shared:
+            self.ques = shared['ques']
+            if 'annotation' in shared:
+                self.annotation = shared['annotation']
+        else:
+            self._setup_data(data_path, annotation_path)
+
+
+        # for ordered data in batch mode (especially, for validation and
+        # testing), each teacher in the batch gets a start index and a step
+        # size so they all process disparate sets of the data
+        self.step_size = opt.get('batchsize', 1)
+        self.data_offset = opt.get('batchindex', 0)
+
+        self.reset()
 
     def __len__(self):
         return self.len
 
-    # return state/action dict based upon passed state
+    def reset(self):
+        # Reset the dialog so that it is at the start of the epoch,
+        # and all metrics are reset.
+        super().reset()
+        self.lastY = None
+        self.episode_idx = self.data_offset - self.step_size
+
+    def observe(self, observation):
+        """Process observation for metrics."""
+        if self.lastY is not None:
+            loss = self.metrics.update(observation, self.lastY)
+            self.lastY = None
+        return observation
+
     def act(self):
         if self.datatype == 'train':
             self.episode_idx = random.randrange(self.len)
         else:
             self.episode_idx = (self.episode_idx + 1) % self.len
-            # always showing the same index now.
+
         qa = self.ques['questions'][self.episode_idx]
         question = qa['question']
         image_id = qa['image_id']
-        # question_id = qa['question_id']
 
         if self.datatype != 'test':
             anno = self.annotation['annotations'][self.episode_idx]
@@ -83,14 +114,29 @@ def act(self):
 
         img_path = self.image_path + '%012d.jpg' % (image_id)
 
-        return {
-            'image': _image_loader(img_path),
+        action = {
+            'image': _image_loader(self.opt, img_path),
             'text': question,
-            'labels': answers,
             'episode_done': True
         }
 
-    def _setup_data(self, data_path, annotation_path, image_path):
+        if not self.datatype.startswith('test'):
+            anno = self.annotation['annotations'][self.episode_idx]
+            self.lastY = [ans['answer'] for ans in anno['answers']]
+
+        if self.datatype.startswith('train'):
+            action['labels'] = self.lastY
+
+        return action
+
+    def share(self):
+        shared = super().share()
+        shared['ques'] = self.ques
+        if hasattr(self, 'annotation'):
+            shared['annotation'] = self.annotation
+        return shared
+
+    def _setup_data(self, data_path, annotation_path):
         print('loading: ' + data_path)
         with open(data_path) as data_file:
             self.ques = json.load(data_file)
@@ -100,7 +146,6 @@ def _setup_data(self, data_path, annotation_path, image_path):
             with open(annotation_path) as data_file:
                 self.annotation = json.load(data_file)
 
-        self.image_path = image_path
         self.len = len(self.ques['questions'])
 
 class DefaultTeacher(OeTeacher):

diff --git a/parlai/tasks/vqa_coco2014_v2/build.py b/parlai/tasks/vqa_coco2014_v2/build.py
@@ -9,22 +9,30 @@
 import os
 
 
-def buildImage(dpath):
-    print('[building image data: ' + dpath + ']')
-    # download the image data.
-    fname1 = 'train2014.zip'
-    fname2 = 'val2014.zip'
-    fname3 = 'test2014.zip'
+def buildImage(opt):
+    dpath = os.path.join(opt['datapath'], 'COCO-IMG')
 
-    url = 'http://msvocds.blob.core.windows.net/coco2014/'
+    if not build_data.built(dpath):
+        print('[building image data: ' + dpath + ']')
+        build_data.remove_dir(dpath)
+        build_data.make_dir(dpath)
+        # download the image data.
+        fname1 = 'train2014.zip'
+        fname2 = 'val2014.zip'
+        fname3 = 'test2014.zip'
 
-    build_data.download(dpath, url + fname1)
-    build_data.download(dpath, url + fname2)
-    build_data.download(dpath, url + fname3)
+        url = 'http://msvocds.blob.core.windows.net/coco2014/'
 
-    build_data.untar(dpath, fname1, False)
-    build_data.untar(dpath, fname2, False)
-    build_data.untar(dpath, fname3, False)
+        build_data.download(dpath, url + fname1)
+        build_data.download(dpath, url + fname2)
+        build_data.download(dpath, url + fname3)
+
+        build_data.untar(dpath, fname1, False)
+        build_data.untar(dpath, fname2, False)
+        build_data.untar(dpath, fname3, False)
+
+        # Mark the data as built.
+        build_data.mark_done(dpath)
 
 
 
@@ -58,7 +66,5 @@ def build(opt):
         build_data.untar(dpath, fname4)
         build_data.untar(dpath, fname5)
 
-        # buildImage(dpath)
-
         # Mark the data as built.
         build_data.mark_done(dpath)