In [1]:
 cd /data/p300488/lang2prog

/data/p300488/lang2prog


# Exploring CLEVR questions dataset

In [2]:
import os
root = '/data/p300488/lang2prog'
clevr_path = '/data/p300488/datasets/clevr/CLEVR_v1.0'
train_questions_path = os.path.join(clevr_path, 'questions/CLEVR_train_questions.json')
val_questions_path = os.path.join(clevr_path, 'questions/CLEVR_val_questions.json')
test_questions_path = os.path.join(clevr_path, 'questions/CLEVR_test_questions.json')

Read GQA questions dataset. Size of training set?

In [3]:
import json

ds = json.load(open(train_questions_path))['questions']
print(len(ds))

699989


See the structure of a sample, it contains a program annotation for the question.

In [4]:
from pprint import pprint
pprint(ds[0])

{'answer': 'yes',
 'image_filename': 'CLEVR_train_000000.png',
 'image_index': 0,
 'program': [{'function': 'scene', 'inputs': [], 'value_inputs': []},
             {'function': 'filter_size',
              'inputs': [0],
              'value_inputs': ['large']},
             {'function': 'filter_color',
              'inputs': [1],
              'value_inputs': ['green']},
             {'function': 'count', 'inputs': [2], 'value_inputs': []},
             {'function': 'scene', 'inputs': [], 'value_inputs': []},
             {'function': 'filter_size',
              'inputs': [4],
              'value_inputs': ['large']},
             {'function': 'filter_color',
              'inputs': [5],
              'value_inputs': ['purple']},
             {'function': 'filter_material',
              'inputs': [6],
              'value_inputs': ['metal']},
             {'function': 'filter_shape',
              'inputs': [7],
              'value_inputs': ['cube']},
             {'function': 'c

Let'see all the different reasoning primitives and their related concept values

In [5]:
all_primitives = set()
for sample in ds:
    for node in sample['program']:
        _fn = node['function']
        _side_input =  '[' + node['value_inputs'][0] + ']' if node['value_inputs'] else ''
        all_primitives.add(_fn + _side_input)

pprint(all_primitives)

{'count',
 'equal_color',
 'equal_integer',
 'equal_material',
 'equal_shape',
 'equal_size',
 'exist',
 'filter_color[blue]',
 'filter_color[brown]',
 'filter_color[cyan]',
 'filter_color[gray]',
 'filter_color[green]',
 'filter_color[purple]',
 'filter_color[red]',
 'filter_color[yellow]',
 'filter_material[metal]',
 'filter_material[rubber]',
 'filter_shape[cube]',
 'filter_shape[cylinder]',
 'filter_shape[sphere]',
 'filter_size[large]',
 'filter_size[small]',
 'greater_than',
 'intersect',
 'less_than',
 'query_color',
 'query_material',
 'query_shape',
 'query_size',
 'relate[behind]',
 'relate[front]',
 'relate[left]',
 'relate[right]',
 'same_color',
 'same_material',
 'same_shape',
 'same_size',
 'scene',
 'union',
 'unique'}


In this formalism, the primitives are both concept-aware (``filter_color, filter_size`` etc.), as well as vocabulary-aware (``filter_color[red], filter_color[blue]``, etc ). Let's create a version which decouples specific concept values from the primitives (*vocabulary-agnostic*):

In [19]:
vocab_agnostic_primitives = set()
concept_agnostic_primitives = set()
for fn in all_primitives:
    f = fn.split('[')[0]
    vocab_agnostic_primitives.add(f)
    if len(f.split('_')) > 1:
        f = f.split('_')[0] if f.split('_')[1] not in ['than', 'integer'] else f
    concept_agnostic_primitives.add(f)

pprint(vocab_agnostic_primitives)

{'count',
 'equal_color',
 'equal_integer',
 'equal_material',
 'equal_shape',
 'equal_size',
 'exist',
 'filter_color',
 'filter_material',
 'filter_shape',
 'filter_size',
 'greater_than',
 'intersect',
 'less_than',
 'query_color',
 'query_material',
 'query_shape',
 'query_size',
 'relate',
 'same_color',
 'same_material',
 'same_shape',
 'same_size',
 'scene',
 'union',
 'unique'}


And the most general formalism, without concept-awareness (*concept_agnostic*)\:

In [20]:
pprint(concept_agnostic_primitives)

{'count',
 'equal',
 'equal_integer',
 'exist',
 'filter',
 'greater_than',
 'intersect',
 'less_than',
 'query',
 'relate',
 'same',
 'scene',
 'union',
 'unique'}


Let's give some context on different primitive types:

   - **Operational** : ``scene``: Initializes a set of objects given RGB image, ``unique``: {n} -> n
   
   - **Logical**: ``union/intersection``: union / intersection of two sets (outputs of two reasoning branches),
   
   - **Enumeration**: ``exist``: is a set non-empty?, ``count``: size of set, ``less_than/greater_than/equal_integer``: compares two integers
   
   - **Visual**: ``filter``: isolate object set based on attribute value, ``query``: ask for an attribute value, ``same``: object set which has same attribute value as given, ``equal``: whether two objects have equal attribute value
   
   - **Spatial**: ``relate``: object set which has certain spatial relation to given object

## Building Language-to-Program datasets

Let's convert the program annotations to a universal format, which also works for GQA dataset

In [6]:
from typing import *
from dataclasses import dataclass, field


@dataclass
class ProgramNode:
    step: int
    function: str
    value_input: Optional[str]
    inputs: Sequence[int]
    _outputs: Optional[Sequence[int]] = field(default=None)

    def __repr__(self):
        return f"({str(self.step)}): {self.function}{'['+self.value_input+']' if self.value_input is not None else ''}({','.join(list(map(str, self.inputs))) if self.inputs else ''})"
    

In [7]:
def formalize_program_annots(ds):
    programs = []
    for sample in ds:
        _nodes = []
        for i, node in enumerate(sample['program']):
            _value = None if not node['value_inputs'] else node['value_inputs'][0]
            _nodes.append(ProgramNode(step=i,
                                     function=node['function'],
                                     inputs=node['inputs'],
                                     value_input=_value,
            ))
        programs.append(_nodes)
    return programs


# do for training set
train_progs = formalize_program_annots(ds)

Let's see some processed program annotations:

In [8]:
pprint(train_progs[:5])

[[(0): scene(),
  (1): filter_size[large](0),
  (2): filter_color[green](1),
  (3): count(2),
  (4): scene(),
  (5): filter_size[large](4),
  (6): filter_color[purple](5),
  (7): filter_material[metal](6),
  (8): filter_shape[cube](7),
  (9): count(8),
  (10): greater_than(3,9)],
 [(0): scene(),
  (1): filter_size[small](0),
  (2): filter_color[cyan](1),
  (3): filter_material[rubber](2),
  (4): unique(3),
  (5): same_shape(4),
  (6): count(5)],
 [(0): scene(),
  (1): filter_size[large](0),
  (2): filter_shape[sphere](1),
  (3): unique(2),
  (4): scene(),
  (5): filter_size[large](4),
  (6): filter_material[rubber](5),
  (7): filter_shape[cube](6),
  (8): unique(7),
  (9): query_color(3),
  (10): query_color(8),
  (11): equal_color(9,10)],
 [(0): scene(),
  (1): filter_size[large](0),
  (2): filter_color[brown](1),
  (3): filter_shape[sphere](2),
  (4): unique(3),
  (5): relate[left](4),
  (6): scene(),
  (7): filter_color[brown](6),
  (8): filter_shape[cylinder](7),
  (9): unique(8),


Repeat for val and test datasets as well and save them under ``checkpoints`` folder

In [9]:
save_dir = os.path.join(root, "checkpoints/clevr_programs")
if not os.path.isdir(save_dir):
    os.mkdir(save_dir)

with open(os.path.join(save_dir, 'CLEVR_train_programs.json'), 'w') as f:
    json.dump([[p.__dict__ for p in ps] for ps in train_progs], f)
    
_ds = json.load(open(val_questions_path))['questions']
_progs = formalize_program_annots(_ds)
with open(os.path.join(save_dir, 'CLEVR_val_programs.json'), 'w') as f:
    json.dump([[p.__dict__ for p in ps] for ps in _progs], f)

_ds = json.load(open(test_questions_path))['questions']
_progs = formalize_program_annots(_ds)
with open(os.path.join(save_dir, 'CLEVR_test_programs.json'), 'w') as f:
    json.dump([[p.__dict__ for p in ps] for ps in _progs], f)

del _ds, _progs

KeyError: 'program'