In [1]:
import numpy as np
import os
import random
import nltk
from tqdm import tqdm
import json
import re

In [2]:
os.chdir('..')
from vqa import VQA

In [3]:
SEED = 2137
random.seed(SEED)
np.random.seed(SEED)

## VQA tool

In [4]:
vqa = VQA(annotation_file= "../data/v2_mscoco_train2014_annotations.json",
          question_file="../data/v2_OpenEnded_mscoco_train2014_questions.json")
vqa_val = VQA(annotation_file= "../data/v2_mscoco_val2014_annotations.json",
              question_file="../data/v2_OpenEnded_mscoco_val2014_questions.json")

q_ids_train = vqa.getImgIds()
q_ids_val = vqa_val.getImgIds()
q_ids_all = q_ids_train+q_ids_val

loading VQA annotations and questions into memory...
0:00:06.270641
creating index...
index created!
loading VQA annotations and questions into memory...
0:00:02.244533
creating index...
index created!


In [5]:
def sentence2words(sentence, map_person=True):
    def word_modifier(word):
        MAP = {'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6',
               'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'both': '2', 'neither': '0',
               'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5', 'sixth': '6',
               'seventh': '7', 'eighth': '8', 'ninth': '9', 'tenth': '10',
               'inside': 'in', 'outside': 'out'}
        MAPPERSON = {'you': 'i', 'your': 'my', 'yours': 'mine', 'i': 'you', 'me': 'you',
                     'my': 'your', 'mine': 'yours'}
        if word in MAP:
            word = MAP[word]
        if map_person and word in MAPPERSON:
            word = MAPPERSON[word]
        return word
    question_words = ['what', 'where', 'why', 'how', 'when', 'who', 'whose', 'whom']
    words = re.split('[,. !"?/_()]', sentence)
    words = [word_modifier(word.lower()) for word in words if word]
    words_no_short_verbs = []
    for word in words:
        if word[-2:] == "'s" and word[:-2] in question_words:
            words_no_short_verbs.append(word[:-2])
            words_no_short_verbs.append('is')
        else:
            words_no_short_verbs.append(word)
    return words_no_short_verbs

In [6]:
def preprocess_qa(q, a, question_type):
    question_type = sentence2words(question_type)
    nr_beginning_words = len(question_type)
    q = sentence2words(q)[nr_beginning_words:]
    a = sentence2words(a)
    return q, a

In [7]:
def find_word_index(words, word):
    for i, w in enumerate(words):
        if w == word:
            return i
    return -1

In [8]:
def find_or(words):
    return find_word_index(words, 'or')

In [9]:
def positional_matching(option, answer, is_first_option):
    matching_position = -1
    nr_matches = 0
    for j, answer_word in enumerate(answer):
        len_limit = len(answer_word)
        if len_limit > 1 and answer_word[-1] == 's':
            if len_limit > 2 and answer_word[-2] == 'e':
                len_limit -= 1
            len_limit -= 1
        elif len_limit > 1 and answer_word[-1] == 'y':
            len_limit -= 1
        elif len_limit > 3 and answer_word[-3:] == 'ing':
            len_limit -= 3
        elif len_limit > 3 and answer_word[-3:] == 'ish':
            len_limit -= 3
        shortest_answer = answer_word[:len_limit]
        
        for i, w in enumerate(option):
            len_w = len(w)
            #if w in answer_word_forms:
            if w == answer_word or len_limit > 1 and len_w > 1 and w[:len_limit] == shortest_answer[:len_w]:
                matching_position = i
                nr_matches = 1
                option = option[i+1:]
                answer = answer[j+1:]
                #print('o, a =', option, answer)
                for j, (w1, w2) in enumerate(zip(option, answer)):
                    if w1 != w2:
                        break
                    nr_matches += 1
                    if not is_first_option:
                        matching_position += 1
                break
        if nr_matches > 0:
            break
    return matching_position, nr_matches

In [10]:
def remove_duplicates(sentence):
    prev_word = sentence[0]
    new_sentence = [prev_word]
    for word in sentence[1:]:
        if word != prev_word:
            new_sentence.append(word)
            prev_word = word
    return new_sentence

In [11]:
def list2str(answer):
    answer = remove_duplicates(answer)
    answer[0] = answer[0].capitalize()
    answer = ' '.join(answer) + '.'
    answer = re.sub(r'\s([?.,!"()\'](?:\s|$))', r'\1', answer)
    return answer

In [12]:
def find_number(words, direction):
    for i, w in enumerate(words[::direction]):
        if w.isnumeric():
            return i
    return -1

In [13]:
def get_tag_position(q, only_verbs=False):
    pos_tags = [x[1] for x in nltk.pos_tag(q)]
    for possible_tag in ['VB', 'RB', 'JJ']: # e.g. 'is' can be before 'going' (VB), 'well' (RB), 'rich' (JJ)
        if only_verbs and possible_tag[:2] == 'RB':
            return -1
        for tag_position, tag in enumerate(pos_tags):
            if tag[:2] == possible_tag:
                if tag_position < len(pos_tags) - 1 and possible_tag == 'VB' and \
                pos_tags[tag_position + 1] in set(['VB', 'VBP', 'VBZ']):
                    tag_position += 1
                return tag_position
    for tag_position, tag in enumerate(pos_tags, 1):
        if tag[:2] in set(['NN', 'PR']): # Like 'is the man !!! on the street?'
            return tag_position
    return -1

def insert_verb_after_proper_tag(q, a, verb):
    tag_position = get_tag_position(q)
    if tag_position != -1:
        return a + q[:tag_position] + verb + q[tag_position:]
    return a + verb + q

In [14]:
def split_or(q, a, q_type, a_type, verb='', or_position=-1):
    PREPOZITIONS = ['a', 'an', 'the', 'not']
    OFFSETLIMIT = 4
    is_numeric_answer = a_type == 'number'
    if not isinstance(verb, list):
        verb = sentence2words(verb)
    
    if or_position == -1:
        or_position = find_or(q)
        if or_position == -1:
            print('Error. No "or"')
            return None
    
    option1 = q[:or_position]
    option2 = q[or_position+1:]
    position_option1, nr_matches1 = positional_matching(option1, a, True)
    position_option2, nr_matches2 = positional_matching(option2, a, False)
    
    if position_option1 > 1 and option1[position_option1-1] in PREPOZITIONS:
        position_option1 -= 1
    if position_option2 != -1 and position_option2 < len(option2) - 1 and option2[position_option2] in PREPOZITIONS:
        position_option2 += 1
    
    if position_option1 == -1:
        if position_option2 == -1:
            if a[0] == '2': # Both options are correct
                q[or_position] = 'and'
                return insert_verb_after_proper_tag(q, [], verb)
            if a[0] == '0': # Both options are incorrect
                q[or_position] = 'nor'
                verb = verb + ['not']
                return insert_verb_after_proper_tag(q, [], verb)
            if q_type == 'how many':
                return q
            #print('Answer not found in options 1 and 2. Print important arguments...')
            #print(q, '|', a, '|', q_type, '|', a_type, '|', verb)
            possible_option1 = [q[or_position - 1]]
            possible_option2 = [q[or_position + 1]]
            # 'how many' will be used as q_type to remove the options
            verb = verb + a
            possible_answer1 = split_or(q, possible_option1, 'how many', a_type, verb, or_position)
            possible_answer2 = split_or(q, possible_option2, 'how many', a_type, verb, or_position)
            answer = possible_answer2 if len(possible_answer2) < len(possible_answer1) else possible_answer1
            #print('Answer is:', answer)
            #print('*' * 100)
            return answer
        else:
            choose_first_option = False
            length_option2 = position_option2 + 1
            position_option1 = max(len(option1) - length_option2, 0)
            ############################################################################################
            if is_numeric_answer and position_option2 > 0:
                number_distance = find_number(option1, -1)
                if number_distance != -1:
                    position_number = len(option1) - 1 - number_distance
                    if position_option1 <= OFFSETLIMIT:
                        position_option1 = min(position_number, position_option1)
                    if position_number != 0:
                        nr_w_before = min(position_number, position_option2)
                        orig_position_number = position_number
                        for w1, w2 in zip(option1[position_number-1::-1][:nr_w_before],
                                          option2[position_option2-1::-1][:nr_w_before]):
                            if w1 != w2:
                                break
                            position_number -= 1
                        if position_number != orig_position_number:
                            position_option1 = min(position_option1, position_number)
            #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
            if is_numeric_answer and position_option2 < len(option2) - 1:
                number_distance = find_number(option1, -1)
                if number_distance != -1:
                    position_number = len(option1) - 1 - number_distance
                    if position_option1 <= OFFSETLIMIT:
                        position_option1 = min(position_number, position_option1)
                    nr_w_after = min(len(option1) - 1 - position_number, len(option2) - 1 - position_option2)
                    for w1, w2 in zip(option1[position_number+1:position_number+1+nr_w_after],
                                      option2[position_option2+1:position_option2+1+nr_w_after]):
                        if w1 != w2:
                            break
                        position_option2 += 1
            ############################################################################################
    else:
        if position_option2 == -1:
            choose_first_option = True
            length_option1 = len(option1) - position_option1
            position_option2 = max(length_option1 - 1, 0)
            ############################################################################################
            if is_numeric_answer and position_option1 > 0:
                number_distance = find_number(option2, 1)
                if number_distance != -1:
                    position_number = number_distance
                    if len(option2) - 1 - position_option2 <= OFFSETLIMIT:
                        position_option2 = max(position_number, position_option2)
                    nr_w_after = min(len(option1) - 1 - position_option1, len(option2) - 1 - position_option1)
                    orig_position_number = position_number
                    for w1, w2 in zip(option1[position_option1+1:position_option1+1+nr_w_after],
                                      option2[position_number+1:position_number+1+nr_w_after]):
                        if w1 != w2:
                            break
                        position_number += 1
                    if position_number != orig_position_number:
                        position_option2 = max(position_option2, position_number)
            #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
            if is_numeric_answer and position_option1 < len(option1) - 1:
                number_distance = find_number(option2, 1)
                if number_distance != -1:
                    position_number = number_distance
                    if len(option2) - 1 - position_option2 <= OFFSETLIMIT:
                        position_option2 = max(position_number, position_option2)
                    else:
                        if position_number != 0:
                            nr_w_before = min(position_option1, position_number)
                            for w1, w2 in zip(option1[position_option1-1::-1][:nr_w_before],
                                              option2[position_number-1::-1][:nr_w_before]):
                                if w1 != w2:
                                    break
                                position_option1 -= 1
            ############################################################################################
        else:
            if nr_matches1 != nr_matches2:
                choose_first_option = nr_matches1 > nr_matches2
            else: # Choose the option with less words (Occam's razor)
                length_option2 = position_option2 + 1
                length_option1 = len(option1) - position_option1
                choose_first_option = length_option1 < length_option2
    
    if position_option1 > 1 and option1[position_option1-1] in PREPOZITIONS:
        position_option1 -= 1
    if position_option2 != -1 and position_option2 < len(option2) - 1 and option2[position_option2] in PREPOZITIONS:
        position_option2 += 1
        
    before_option1 = option1[:position_option1]
    option1 = option1[position_option1:]
    after_option2 = option2[position_option2+1:]
    option2 = option2[:position_option2+1]

    correct_option = option1 if choose_first_option else option2
    correct_option = [] if q_type == 'how many' else correct_option
    
    tag_position = get_tag_position(before_option1, only_verbs=True)
    if tag_position != -1:
        before_option1 = before_option1[:tag_position] + verb + before_option1[tag_position:]
    else:
        before_option1 = before_option1 + verb

    return before_option1 + correct_option + after_option2

In [15]:
ALL_VERBS = set(['is', 'are', 'has', 'have', 'do', 'does', 'will', 'can', 'shall', 'was', 'were', 'had', 'did',
                'would', 'could', 'should'])

def is_proper_word_after_how(word):
    WORDS_AFTER_HOW = set(['many', 'much', 'more', 'often', 'seldom', 'frequent', 'regular'])
    return word in WORDS_AFTER_HOW or len(word) >= 4 and word[-2:] == 'ly' and word[:-2] in WORDS_AFTER_HOW
    
def default_answer(q, a, qtype, atype, verb='', predefined_answer=[]):
    or_position = find_or(q)
    if or_position > 0 and or_position < len(q) - 1:
        return split_or(q, a, qtype, atype, verb, or_position=or_position)
    how_index = find_word_index(q, 'how')
    if how_index != -1 and how_index <= len(q) - 3:
        if is_proper_word_after_how(q[how_index + 1]):
            return number_how_many(q[how_index+2:], a, qtype, atype)
    if predefined_answer and get_tag_position(q, only_verbs=True) == -1:
        return predefined_answer
    a = [] if qtype == 'why' else a
    verb = [verb] if verb else []
    return insert_verb_after_proper_tag(q, a, verb)

# Questions like "Is this man who I know ..." may be problematic because "know" is not the verb we are looking for
def yes_no_default_answer(q, a, qtype, atype, verb=''):
    if a == ['yes']:
        if verb[:2] == 'do':
            verb = []
        else:
            verb = [verb]
    else:
        verb = [verb, 'not']
    return insert_verb_after_proper_tag(q, a + [','], verb)

def try_other_function(q, a, qtype, atype, possible_function_name, default_func=default_answer):
    possible_verb = q[0]
    q = q[1:]
    
    if atype == 'number':
        if is_proper_word_after_how(possible_verb):
            return number_how_many(q, a, qtype, atype)
        if is_proper_word_after_how(q[0]):
            if len(q) <= 1:
                return a
            return number_how_many(q[1:], a, qtype, atype)
    
    try:
        return eval(possible_function_name + '(q, a, qtype, atype)')
    except NameError:
        if possible_verb in ALL_VERBS:
            return default_func(q, a, qtype, atype, verb = possible_verb)
        if qtype == '':
            q = [possible_verb] + q
        return default_func(q, a, qtype, atype)
    except SyntaxError:
        if qtype == '':
            q = [possible_verb] + q
        return default_func(q, a, qtype, atype)

def number_are_the(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are')

def number_are_these(q, a, qtype, atype):
    return number_are(['these'] + q, a, qtype, atype)

def number_are_there_any(q, a, qtype, atype):
    return number_are_there(q, a, qtype, atype)

def number_are_they(q, a, qtype, atype):
    return ['They', 'are'] + default_answer(q, a, qtype, atype)

def number_are(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are')

def number_are_there(q, a, qtype, atype):
    return ['There', 'are'] + default_answer(q, a, qtype, atype)

def number_can_you(q, a, qtype, atype):
    return ['i', 'can'] + default_answer(q, a, qtype, atype, predefined_answer = q + [','] + a)

def number_do(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def number_do_you(q, a, qtype, atype):
    return default_answer(['i'] + q, a, qtype, atype)
 
def number_does_the(q, a, qtype, atype):
    return default_answer(['the'] + q, a, qtype, atype)

def number_does_this(q, a, qtype, atype):
    return default_answer(['this'] + q, a, qtype, atype)

def number_has(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'has')

def number_how(q, a, qtype, atype):
    if is_proper_word_after_how(q[0]):
        return number_how_many(q[1:], a, qtype, atype)
    return default_answer(q, a, qtype, atype)

def number_how_many(q, a, qtype, atype):
    no_options_answer = default_answer(q, a, qtype, atype)
    return a + no_options_answer

def number_how_many_people_are(q, a, qtype, atype):
    return number_how_many(['people', 'are'] + q, a, qtype, atype)

def number_how_many_people_are_in(q, a, qtype, atype):
    return number_how_many(['people', 'are', 'in'] + q, a, qtype, atype)

def number_was(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'was')

def number_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is')

def number_is_he(q, a, qtype, atype):
    return number_is(['he'] + q, a, qtype, atype)

def number_is_it(q, a, qtype, atype):
    return number_is(['it'] + q, a, qtype, atype)

def number_is_the(q, a, qtype, atype):
    return number_is(['the'] + q, a, qtype, atype)

def number_is_that(q, a, qtype, atype):
    return number_is(['that'] + q, a, qtype, atype)

def number_is_that_a(q, a, qtype, atype):
    return number_is(['that', 'a'] + q, a, qtype, atype)

def number_is_the_man(q, a, qtype, atype):
    return number_is(['the', 'man'] + q, a, qtype, atype)

def number_is_the_man(q, a, qtype, atype):
    return number_is(['the', 'person'] + q, a, qtype, atype)

def number_is_the_woman(q, a, qtype, atype):
    return number_is(['the', 'woman'] + q, a, qtype, atype)

def number_is_there(q, a, qtype, atype):
    return ['There', 'is'] + default_answer(q, a, qtype, atype)

def number_is_this(q, a, qtype, atype):
    return number_is(['this'] + q, a, qtype, atype)

def number_is_this_person(q, a, qtype, atype):
    return number_is(['this', 'person'] + q, a, qtype, atype)

def number_is_this_a(q, a, qtype, atype):
    return number_is(['this', 'a'] + q, a, qtype, atype)

def number_none_of_the_above(q, a, qtype, atype):
    possible_function_name = 'number_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name)

def number_what(q, a, qtype, atype):
    possible_function_name = 'number_what_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name)

def number_what_animal_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is')

def number_what_sport_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is')

def number_what_are(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are', predefined_answer = q + ['are'] + a)

def number_what_are_the(q, a, qtype, atype):
    return number_what_are(['the'] + q, a, qtype, atype)

def number_what_brand(q, a, qtype, atype):
    for verb in ALL_VERBS:
        end_index = find_word_index(q, verb)
        if end_index != -1:
            break
    return default_answer(q, a, qtype, atype, predefined_answer = a + q[end_index:])

def number_what_does_the(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, predefined_answer = ['The'] + q + a)

def number_what_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = a + ['is'] + q)

def number_what_is_on_the(q, a, qtype, atype):
    return number_what_is(['on', 'the'] + q, a, qtype, atype)

def number_what_is_in_the(q, a, qtype, atype):
    return number_what_is(['in', 'the'] + q, a, qtype, atype)

def number_what_is_the(q, a, qtype, atype):
    return number_what_is(['the'] + q, a, qtype, atype)

def number_what_is_the_man(q, a, qtype, atype):
    return number_what_is(['the', 'man'] + q, a, qtype, atype)

def number_what_color(q, a, qtype, atype):
    for verb in ALL_VERBS:
        end_index = find_word_index(q, verb)
        if end_index != -1:
            break
    return default_answer(q, a, qtype, atype, predefined_answer = a + q[end_index:])

def number_what_color_is_the(q, a, qtype, atype):
    return number_what_is_the(q, a, qtype, atype)

def number_what_is_the_name(q, a, qtype, atype):
    return number_what_is(['the', 'name'] + q, a, qtype, atype)

def number_what_is_this(q, a, qtype, atype):
    return number_what_is(['this'] + q, a, qtype, atype)

def number_what_kind_of(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def number_what_number_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is', 'number'] + a)

def number_what_time(q, a, qtype, atype):
    clock_index = find_word_index(q, 'clock')
    if len(q) >= 2 and q[-2] == 'is' and q[-1] == 'it' or clock_index != -1:
        return ['It', 'is'] + a
    return default_answer(q, a, qtype, atype, verb = 'is')

def number_what_type_of(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def number_where_is_the(q, a, qtype, atype):
    q = ['the'] + q
    if q[-1] == 'from':
        return q[:-1] + ['is', 'from'] + a
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is'] + a)

def number_where(q, a, qtype, atype):
    possible_function_name = 'number_where_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name)

def number_where_are(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are', predefined_answer = q + ['are'] + a)

def number_where_are_the(q, a, qtype, atype):
    q = ['the'] + q
    if q[-1] == 'from':
        return q[:-1] + ['are', 'from'] + a
    return default_answer(q, a, qtype, atype, verb = 'are', predefined_answer = q + ['are'] + a)

def number_which(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def number_who_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is')

def number_why(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, predefined_answer = ['Because'] + a)

def number_why_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is')

def number_why_is_the(q, a, qtype, atype):
    return number_why_is(['the'] + q, a, qtype, atype)
    
def other_are(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are', predefined_answer = q + ['are'] + a)

def other_are_the(q, a, qtype, atype):
    return other_are(['the'] + q, a, qtype, atype)

def other_are_there(q, a, qtype, atype):
    return other_are(['there'] + q, a, qtype, atype)
    
def other_are_these(q, a, qtype, atype):
    return other_are(['these'] + q, a, qtype, atype)

def other_are_they(q, a, qtype, atype):
    return other_are(['they'] + q, a, qtype, atype)

def other_can_you(q, a, qtype, atype):
    return ['I', 'can'] + default_answer(q, a, qtype, atype, predefined_answer = q + a)

def other_could(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def other_do(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def other_do_you(q, a, qtype, atype):
    return other_do(['i'] + q, a, qtype, atype)

def other_does_the(q, a, qtype, atype):
    return other_do(['the'] + q, a, qtype, atype)
    
def other_does_this(q, a, qtype, atype):
    return other_do(['this'] + q, a, qtype, atype)    

def other_has(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'has', predefined_answer = q + ['has'] + a)

def other_how(q, a, qtype, atype):
    possible_function_name = 'other_how_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name)

def other_how_many(q, a, qtype, atype):
    return number_how_many(q, a, qtype, atype)

def other_how_many_people_are(q, a, qtype, atype):
    return other_how_many(['people', 'are'] + q, a, qtype, atype)

def other_how_many_people_are_in(q, a, qtype, atype):
    return other_how_many(['people', 'are', 'in'] + q, a, qtype, atype)

def other_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is'] + a)

def other_is_he(q, a, qtype, atype):
    return other_is(['he'] + q, a, qtype, atype)

def other_is_it(q, a, qtype, atype):
    return other_is(['it'] + q, a, qtype, atype)

def other_is_that_a(q, a, qtype, atype):
    return other_is(['that', 'a'] + q, a, qtype, atype)

def other_is_the(q, a, qtype, atype):
    return other_is(['the'] + q, a, qtype, atype)

def other_is_the_man(q, a, qtype, atype):
    return other_is(['the', 'man'] + q, a, qtype, atype)

def other_is_the_person(q, a, qtype, atype):
    return other_is(['the', 'person'] + q, a, qtype, atype)

def other_is_the_woman(q, a, qtype, atype):
    return other_is(['the', 'woman'] + q, a, qtype, atype)

def other_is_there(q, a, qtype, atype):
    return other_is(['there'] + q, a, qtype, atype)

def other_is_there_a(q, a, qtype, atype):
    return other_is(['there', 'a'] + q, a, qtype, atype)

def other_is_this(q, a, qtype, atype):
    return other_is(['this'] + q, a, qtype, atype)

def other_is_this_a(q, a, qtype, atype):
    return other_is(['this', 'a'] + q, a, qtype, atype)

def other_is_this_an(q, a, qtype, atype):
    return other_is(['this', 'an'] + q, a, qtype, atype)

def other_is_this_person(q, a, qtype, atype):
    return other_is(['this', 'person'] + q, a, qtype, atype)

def other_none_of_the_above(q, a, qtype, atype):
    possible_function_name = 'other_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name)

def other_was(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'was', predefined_answer = q + ['was'] + a)

def other_what(q, a, qtype, atype):
    possible_function_name = 'other_what_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name)

def other_what_animal_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is'] + a)

def other_what_are(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are', predefined_answer = q + ['are'] + a)

def other_what_are_the(q, a, qtype, atype):
    return other_what_are(['the'] + q, a, qtype, atype)

def other_what_brand(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def other_what_color(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def other_what_color_are(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are', predefined_answer = q + ['are'] + a)

def other_what_color_are_the(q, a, qtype, atype):
    return other_what_color_are(['the'] + q, a, qtype, atype)

def other_what_sport_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is'] + a)

def other_what_color_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is'] + a)

def other_what_color_is_the(q, a, qtype, atype):
    return other_what_color_is(['the'] + q, a, qtype, atype)

def other_what_does_the(q, a, qtype, atype):
    return default_answer(['the'] + q, a, qtype, atype)

def other_what_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is'] + a)

def other_what_is_in_the(q, a, qtype, atype):
    return other_what_is(['in', 'the'] + q, a, qtype, atype)

def other_what_is_on_the(q, a, qtype, atype):
    return other_what_is(['on', 'the'] + q, a, qtype, atype)

def other_what_is_the(q, a, qtype, atype):
    return other_what_is(['the'] + q, a, qtype, atype)

def other_what_is_the_color_of_the(q, a, qtype, atype):
    return other_what_is(['the', 'color', 'of', 'the'] + q, a, qtype, atype)

def other_what_is_the_man(q, a, qtype, atype):
    return other_what_is(['the', 'man'] + q, a, qtype, atype)

def other_what_is_the_name(q, a, qtype, atype):
    return other_what_is(['the', 'name'] + q, a, qtype, atype)

def other_what_is_the_person(q, a, qtype, atype):
    return other_what_is(['the', 'person'] + q, a, qtype, atype)

def other_what_is_the_woman(q, a, qtype, atype):
    return other_what_is(['the', 'woman'] + q, a, qtype, atype)

def other_what_is_this(q, a, qtype, atype):
    return other_what_is(['this'] + q, a, qtype, atype)

def other_what_kind_of(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype)

def other_what_number_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['number', 'is'] + a)

def other_what_room_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['room', 'is'] + a)

def other_what_sport_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['sport', 'is'] + a)

def other_what_time(q, a, qtype, atype):
    return other_what(q, a, qtype, atype)

def other_what_type_of(q, a, qtype, atype):
    return other_what_kind_of(q, a, qtype, atype)

def other_where_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is', predefined_answer = q + ['is'] + a)

def other_where_is_the(q, a, qtype, atype):
    return other_where_is(['the'] + q, a, qtype, atype)

def other_where_are(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'are', predefined_answer = q + ['are'] + a)

def other_where_are_the(q, a, qtype, atype):
    return other_where_are(['the'] + q, a, qtype, atype)

def other_which(q, a, qtype, atype):
    if q[0] in ALL_VERBS:
        return default_answer(q[1:], a, qtype, atype, verb = q[0])
    return default_answer(q, a, qtype, atype)

def other_who_is(q, a, qtype, atype):
    return default_answer(q, a, qtype, atype, verb = 'is')

def other_why(q, a, qtype, atype):
    if q[0] in ALL_VERBS:
        return default_answer(q[1:], a, qtype, atype, verb = q[0])
    return default_answer(q, a, qtype, atype, predefined_answer = ['Because'] + a + q)

def other_why_is(q, a, qtype, atype):
    return default_answer(['is'] + q, a, qtype, atype)

def other_why_is_the(q, a, qtype, atype):
    return other_why(['is', 'the'] + q, a, qtype, atype)

def yes_no_are(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'are')

def yes_no_are_the(q, a, qtype, atype):
    return yes_no_are(['the'] + q, a, qtype, atype)

def yes_no_are_there(q, a, qtype, atype):
    not_term = ['not'] if a == ['no'] else []
    return a + [',', 'there', 'are'] + not_term + q

def yes_no_are_there_any(q, a, qtype, atype):
    not_term = ['no'] if a == ['no'] else ['some']
    return a + [',', 'there', 'are'] + not_term + q

def yes_no_are_these(q, a, qtype, atype):
    return yes_no_are(['these'] + q, a, qtype, atype)

def yes_no_are_they(q, a, qtype, atype):
    return yes_no_are(['they'] + q, a, qtype, atype)

def yes_no_can(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'can')

def yes_no_can_you(q, a, qtype, atype):
    return yes_no_can(['i'] + q, a, qtype, atype)

def yes_no_could(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'could')

def yes_no_do(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'do')

def yes_no_do_you(q, a, qtype, atype):
    return yes_no_do(['i'] + q, a, qtype, atype)

def yes_no_does(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'does')

def yes_no_does_the(q, a, qtype, atype):
    return yes_no_does(['the'] + q, a, qtype, atype)

def yes_no_does_this(q, a, qtype, atype):
    return yes_no_does(['this'] + q, a, qtype, atype)

def yes_no_has(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'has')

def yes_no_is(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'is')

def yes_no_is_he(q, a, qtype, atype):
    return yes_no_is(['he'] + q, a, qtype, atype)

def yes_no_is_it(q, a, qtype, atype):
    return yes_no_is(['it'] + q, a, qtype, atype)

def yes_no_is_that_a(q, a, qtype, atype):
    return yes_no_is(['that', 'a'] + q, a, qtype, atype)

def yes_no_is_the(q, a, qtype, atype):
    return yes_no_is(['the'] + q, a, qtype, atype)

def yes_no_is_the_name(q, a, qtype, atype):
    return yes_no_is(['the', 'name'] + q, a, qtype, atype)

def yes_no_is_the_man(q, a, qtype, atype):
    return yes_no_is(['the', 'man'] + q, a, qtype, atype)

def yes_no_is_the_person(q, a, qtype, atype):
    return yes_no_is(['the', 'person'] + q, a, qtype, atype)

def yes_no_is_the_woman(q, a, qtype, atype):
    return yes_no_is(['the', 'woman'] + q, a, qtype, atype)

def yes_no_is_there(q, a, qtype, atype):
    not_term = ['not'] if a == ['no'] else []
    return a + [',', 'there', 'is'] + not_term + q

def yes_no_is_there_a(q, a, qtype, atype):
    return yes_no_is_there(['a'] + q, a, qtype, atype)

def yes_no_is_this(q, a, qtype, atype):
    return yes_no_is(['this'] + q, a, qtype, atype)

def yes_no_is_this_a(q, a, qtype, atype):
    not_term = ['not'] if a == ['no'] else []
    return a + [',', 'this', 'is'] + not_term + ['a'] + q

def yes_no_is_this_an(q, a, qtype, atype):
    not_term = ['not'] if a == ['no'] else []
    return a + [',', 'this', 'is'] + not_term + ['an'] + q

def yes_no_is_this_person(q, a, qtype, atype):
    return yes_no_is(['this', 'person'] + q, a, qtype, atype)

def yes_no_none_of_the_above(q, a, qtype, atype):
    possible_function_name = 'yes_no_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

def yes_no_was(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'was')

def yes_no_which(q, a, qtype, atype):
    possible_function_name = 'yes_no_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

def yes_no_who(q, a, qtype, atype):
    possible_function_name = 'yes_no_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

def yes_no_who_is(q, a, qtype, atype):
    return yes_no_is(q, a, qtype, atype)

def yes_no_where(q, a, qtype, atype):
    possible_function_name = 'yes_no_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

def yes_no_where_are(q, a, qtype, atype):
    return yes_no_are(q, a, qtype, atype)

def yes_no_where_are_the(q, a, qtype, atype):
    return yes_no_are_the(q, a, qtype, atype)

def yes_no_where_is(q, a, qtype, atype):
    return yes_no_is(q, a, qtype, atype)

def yes_no_where_is_the(q, a, qtype, atype):
    return yes_no_is_the(q, a, qtype, atype)

def yes_no_what(q, a, qtype, atype):
    possible_function_name = 'yes_no_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

def yes_no_what_kind_of(q, a, qtype, atype):
    possible_function_name = 'yes_no_what_kind_of_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

def yes_no_what_does_the(q, a, qtype, atype):
    return yes_no_does_the(q, a, qtype, atype)

def yes_no_what_is(q, a, qtype, atype):
    return yes_no_is(q, a, qtype, atype)

def yes_no_what_is_the(q, a, qtype, atype):
    return yes_no_is_the(q, a, qtype, atype)

def yes_no_what_is_this(q, a, qtype, atype):
    return yes_no_is_this(q, a, qtype, atype)

def yes_no_what_is_the_name(q, a, qtype, atype):
    return yes_no_is_the_name(q, a, qtype, atype)

def yes_no_what_is_the_person(q, a, qtype, atype):
    return yes_no_is_the_person(q, a, qtype, atype)

def yes_no_what_is_the_man(q, a, qtype, atype):
    return yes_no_is_the_man(q, a, qtype, atype)

def yes_no_what_color_is_the(q, a, qtype, atype):
    return yes_no_what_is_the(q, a, qtype, atype)

def yes_no_what_are(q, a, qtype, atype):
    return yes_no_are(q, a, qtype, atype)

def yes_no_what_are_the(q, a, qtype, atype):
    return yes_no_are_the(q, a, qtype, atype)

def yes_no_why(q, a, qtype, atype):
    possible_function_name = 'yes_no_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

def yes_no_why_is(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'is')

def yes_no_why_is_the(q, a, qtype, atype):
    return yes_no_why_is(['the'] + q, a, qtype, atype)

def yes_no_would(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'would')

def yes_no_will(q, a, qtype, atype):
    return yes_no_default_answer(q, a, qtype, atype, 'will')

def yes_no_how(q, a, qtype, atype):
    possible_function_name = 'yes_no_' + q[0]
    return try_other_function(q, a, qtype, atype, possible_function_name, yes_no_default_answer)

In [16]:
def create_long_answer(q, a, qtype, atype):
    atype = 'yes/no' if a == 'yes' or a == 'no' else atype
    atype = 'number' if a.isnumeric() else atype
    func = '_'.join(sentence2words(atype + ' ' + qtype, map_person=False))
    qtype = '' if qtype == 'none of the above' else qtype
    q, a = preprocess_qa(q, a, qtype)
    long_answer = eval(func + '(q, a, qtype, atype)')
    return long_answer

## Training Dataset

In [17]:
vqa_anns = vqa.dataset["annotations"]
training_annotations = [
    {'question_type': ann["question_type"],
     'answers': list2str(create_long_answer(vqa.qqa[ann["question_id"]]["question"],
                                            ann["multiple_choice_answer"],
                                            ann["question_type"],
                                            ann["answer_type"])),
     'image_id': ann["image_id"],
     'answer_type': ann["answer_type"],
     'question_id': ann["question_id"]}
     for ann in tqdm(vqa_anns)]

training_questions = [
    {'question_id' : ann["question_id"],
     'image_id': ann["image_id"], 
     'question':vqa.qqa[ann["question_id"]]["question"]}
     for ann in tqdm(vqa_anns)]

100%|█████████████████████████████████| 443757/443757 [01:51<00:00, 3976.59it/s]
100%|██████████████████████████████| 443757/443757 [00:00<00:00, 1481397.69it/s]


In [18]:
training_annotations[10]

{'question_type': 'is the',
 'answers': 'Yes, the sky is blue.',
 'image_id': 393221,
 'answer_type': 'yes/no',
 'question_id': 393221000}

In [19]:
training_questions[10]

{'question_id': 393221000, 'image_id': 393221, 'question': 'Is the sky blue?'}

## Validation Dataset

In [20]:
vqa_val_anns = vqa_val.dataset["annotations"]
val_annotations = [
    {'question_type': ann["question_type"],
     'answers': list2str(create_long_answer(vqa_val.qqa[ann["question_id"]]["question"],
                                            ann["multiple_choice_answer"],
                                            ann["question_type"],
                                            ann["answer_type"])),
     'image_id': ann["image_id"],
     'answer_type': ann["answer_type"],
     'question_id': ann["question_id"]}
     for ann in tqdm(vqa_val_anns)]

val_questions = [
    {'question_id' : ann["question_id"],
     'image_id': ann["image_id"], 
     'question':vqa_val.qqa[ann["question_id"]]["question"]}
     for ann in tqdm(vqa_val_anns)]

100%|█████████████████████████████████| 214354/214354 [00:52<00:00, 4072.77it/s]
100%|██████████████████████████████| 214354/214354 [00:00<00:00, 1424709.60it/s]


In [24]:
def write_json(file_name, data):
    with open(file_name, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

In [26]:
write_json('../data/new_data/training_annotations.json', training_annotations)
write_json('../data/new_data/training_questions.json', training_questions)
write_json('../data/new_data/val_annotations.json', val_annotations)
write_json('../data/new_data/val_questions.json', val_questions)