In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision as tv
import torchvision.datasets as dset
import torchvision.transforms as T
from torchvision import tv_tensors  # we'll describe this a bit later, bare with us

import torchvision.datasets as datasets
from pathlib import Path

from torchview import draw_graph
from pathlib import Path

import constants
import dataset
import util
import json
import pandas as pd
import models 
from models import VQANet
from resumable_dataset import ResumableDataset

import matplotlib.pyplot as plt
import numpy as np
import time
import gc
from datetime import datetime

from transformers import AutoTokenizer
import traceback
import json

USE_GPU = True
dtype = torch.float32 # We will be using float throughout this tutorial.

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():      
    device = 'mps'                         
else:
    device = torch.device('cpu')


print('using device:', device)
    
import json


using device: mps


In [65]:
def read_answers_to_json(answers_file_path):
    with open(answers_file_path, 'r') as f:
        answers = json.load(f)
        return answers['annotation']
    
def read_answers_to_pd(answers_file_path):
    return pd.json_normalize(read_answers_to_json(answers_file_path))
    
def read_questions_to_json(question_file_path):
    with open(question_file_path, 'r') as f:
        print (question_file_path)
        questions = json.load(f)
        return questions['questions']
    
def read_questions_to_pd(question_file_path):
    return pd.json_normalize(read_questions_to_json(question_file_path))


def create_fake_answers(questions_pd, out_file_name = None):
    result = []
    for index, row in questions_pd.iterrows():
        qid, answer = row['question_id'], "test"
        result.append({
                        "answer": answer, "question_id": qid})

    if out_file_name is not None:
        with open(constants.TEST_OUTPUT.joinpath(out_file_name), 'w+') as f:
            json.dump(result, f)
    return result

def merge_answers(original_answers_json, answers_to_override_json):
    qa_map = dict()
    for qas in answers_to_override_json:
        q, a = qas['question_id'], qas['answer'],
        qa_map[q] = a
    
    for qas in original_answers_json:
        q, a = qas['question_id'], qas['answer'],
        if q not in qa_map:
            qa_map[q] = a
    result = []
    for q, a in qa_map.items():
        result.append({'answer': a, 'question_id': q})
    return result

In [16]:
test_std = read_questions_to_pd(constants.VQA_OPEN_ENDED_QUESTION_STANDARD_TEST)
print(test_std)
#create_fake_answers(test_std, "fake_test_std.txt")

/Users/xiangyuliu/sources/scpd/cs231n/project/data/vqa/v2_OpenEnded_mscoco_test2017_questions.json
        image_id                                    question  question_id
0         262144      Is the ball flying towards the batter?    262144000
1         262144                         What sport is this?    262144001
2         262144                       Can you see the ball?    262144002
3         262144               Is the pitcher wearing a hat?    262144003
4         262144             Will he catch the ball in time?    262144004
...          ...                                         ...          ...
447788    262142     Is the tennis game being played inside?    262142001
447789    262142                         What sport is this?    262142002
447790    218453  What shape is the sign for the pizza shop?    218453000
447791    218453                     What does the sign say?    218453001
447792    218453                        What is on the sign?    218453002

[447793 rows

[{'answer': 'test', 'question_id': 262144000},
 {'answer': 'test', 'question_id': 262144001},
 {'answer': 'test', 'question_id': 262144002},
 {'answer': 'test', 'question_id': 262144003},
 {'answer': 'test', 'question_id': 262144004},
 {'answer': 'test', 'question_id': 262144005},
 {'answer': 'test', 'question_id': 1000},
 {'answer': 'test', 'question_id': 1001},
 {'answer': 'test', 'question_id': 1002},
 {'answer': 'test', 'question_id': 524292000},
 {'answer': 'test', 'question_id': 524292001},
 {'answer': 'test', 'question_id': 524292002},
 {'answer': 'test', 'question_id': 524292003},
 {'answer': 'test', 'question_id': 524292004},
 {'answer': 'test', 'question_id': 131079000},
 {'answer': 'test', 'question_id': 131079001},
 {'answer': 'test', 'question_id': 131079002},
 {'answer': 'test', 'question_id': 131083000},
 {'answer': 'test', 'question_id': 131083001},
 {'answer': 'test', 'question_id': 131083002},
 {'answer': 'test', 'question_id': 131083003},
 {'answer': 'test', 'questio

In [9]:
test_dev = read_questions_to_pd(constants.VQA_OPEN_ENDED_QUESTION_DEV_TEST)
print(test_dev)


/Users/xiangyuliu/sources/scpd/cs231n/project/data/vqa/v2_OpenEnded_mscoco_test-dev2017_questions.json
        image_id                                           question  \
0         262144  What credit card company is on the banner in t...   
1         262144                      Is the pitcher wearing a hat?   
2         262144             Is the ball flying towards the batter?   
3         524289                     Are the horses playing a game?   
4         524289           What is the color of water in the image?   
...          ...                                                ...   
107389    406773           What is the giraffe resting its head on?   
107390    444850       Why is the woman standing next to the truck?   
107391    554649                              Is this a police van?   
107392    372707                Who is wearing their hat backwards?   
107393     92983              What is the featured fabric material?   

        question_id  
0         262144005  


In [68]:
dev_answers

{'0': 'none',
 '1': 'red',
 '2': 'wood',
 '3': 'tennis',
 '4': '2',
 '5': 'tennis',
 '6': 'yes',
 '7': 'wood',
 '8': 'tennis',
 '9': 'pizza',
 '10': 'white',
 '11': '2',
 '12': 'giraffe',
 '13': 'yes',
 '14': 'yes',
 '15': 'white',
 '16': 'yes',
 '17': 'yes',
 '18': 'white',
 '19': 'giraffe',
 '20': 'water',
 '21': 'standing',
 '22': 'giraffe',
 '23': 'white',
 '24': 'banana',
 '25': 'banana',
 '26': 'red',
 '27': 'train',
 '28': 'yes',
 '29': 'grass',
 '30': 'giraffe',
 '31': 'yes',
 '32': 'yes',
 '33': 'yes',
 '34': 'bear',
 '35': 'yes',
 '36': 'yes',
 '37': 'man',
 '38': 'nothing',
 '39': 'yes',
 '40': 'yes',
 '41': '2',
 '42': 'down',
 '43': '2',
 '44': 'yes',
 '45': 'dog',
 '46': 'yes',
 '47': 'laptop',
 '48': '2',
 '49': 'apple',
 '50': '2',
 '51': 'apple',
 '52': 'yes',
 '53': 'umbrella',
 '54': 'none',
 '55': '2',
 '56': 'giraffe',
 '57': '2',
 '58': 'yes',
 '59': 'on wall',
 '60': 'open',
 '61': 'yes',
 '62': 'stop',
 '63': 'yes',
 '64': '12 : 30',
 '65': 'yes',
 '66': 'stop',

In [69]:
fake_answers

[{'answer': 'test', 'question_id': 262144000},
 {'answer': 'test', 'question_id': 262144001},
 {'answer': 'test', 'question_id': 262144002},
 {'answer': 'test', 'question_id': 262144003},
 {'answer': 'test', 'question_id': 262144004},
 {'answer': 'test', 'question_id': 262144005},
 {'answer': 'test', 'question_id': 1000},
 {'answer': 'test', 'question_id': 1001},
 {'answer': 'test', 'question_id': 1002},
 {'answer': 'test', 'question_id': 524292000},
 {'answer': 'test', 'question_id': 524292001},
 {'answer': 'test', 'question_id': 524292002},
 {'answer': 'test', 'question_id': 524292003},
 {'answer': 'test', 'question_id': 524292004},
 {'answer': 'test', 'question_id': 131079000},
 {'answer': 'test', 'question_id': 131079001},
 {'answer': 'test', 'question_id': 131079002},
 {'answer': 'test', 'question_id': 131083000},
 {'answer': 'test', 'question_id': 131083001},
 {'answer': 'test', 'question_id': 131083002},
 {'answer': 'test', 'question_id': 131083003},
 {'answer': 'test', 'questio

In [73]:
with open(constants.TEST_OUTPUT.joinpath("test_None_model_shuffle_train-vqa_no_caption_Jun03_02-34-26.txt"), 'r') as f:
    dev_answers = json.load(f)

{'question_id': {'0': 1002,
  '1': 1001,
  '2': 1000,
  '3': 16000,
  '4': 16004,
  '5': 16002,
  '6': 57001,
  '7': 57005,
  '8': 57002,
  '9': 69001,
  '10': 69002,
  '11': 69000,
  '12': 80002,
  '13': 80001,
  '14': 80000,
  '15': 90007,
  '16': 90006,
  '17': 90004,
  '18': 90010,
  '19': 90003,
  '20': 90001,
  '21': 90005,
  '22': 90011,
  '23': 106001,
  '24': 106002,
  '25': 106000,
  '26': 108000,
  '27': 108001,
  '28': 108002,
  '29': 128011,
  '30': 128010,
  '31': 128002,
  '32': 128001,
  '33': 128004,
  '34': 155000,
  '35': 155002,
  '36': 155001,
  '37': 171006,
  '38': 171012,
  '39': 171005,
  '40': 171014,
  '41': 178002,
  '42': 178004,
  '43': 178000,
  '44': 180003,
  '45': 180000,
  '46': 180002,
  '47': 188000,
  '48': 188002,
  '49': 188001,
  '50': 188004,
  '51': 188003,
  '52': 202003,
  '53': 202002,
  '54': 202000,
  '55': 202001,
  '56': 229002,
  '57': 229000,
  '58': 229003,
  '59': 275002,
  '60': 275001,
  '61': 275000,
  '62': 276002,
  '63': 27600

In [84]:
d_qids = dev_answers['question_id']
d_ans = dev_answers['answer']

print(len(d_qids))
print(d_qids["0"])
result = []
for pair in d_qids.items():
    idx, qid = pair
    d_a = d_ans[idx]
    result.append({'answer': d_a, 'question_id': d_qid})

107394
1002


In [86]:
len(result)

107394

In [88]:
fake_answers = create_fake_answers(test_std)

#with open(constants.TEST_OUTPUT.joinpath("test_None_model_shuffle_train-vqa_no_caption_Jun03_02-34-26.txt"), 'r') as f:
#    dev_answers = json.load(f)

merged_dev_answers = merge_answers(fake_answers, result)

In [89]:
len(merged_dev_answers)

447793

In [90]:
len(merged_dev_answers)
outfile_name = 'test_None_model_shuffle_train-vqa_no_caption_Jun03_02-34-26_all_test_std_questions.txt'


with open(constants.TEST_OUTPUT.joinpath(outfile_name), 'w+') as f:
    json.dump(merged_dev_answers, f)

In [11]:
import json

def store_as_json(file_name):
    with open(constants.TEST_OUTPUT.joinpath(file_name), 'r') as f:
        data = json.load(f)
#        print("read data:", len(data))
        data = json.loads(data) 
    with open(constants.TEST_OUTPUT.joinpath(file_name + '.txt'), 'w+') as f2:
        json.dump(data, f2)

#store_as_json("test_None_model_shuffle_train-vqa_with_caption_Jun02_17-27-12-epoch-0-batch-1949")

In [36]:
with open(constants.VQA_OPEN_ENDED_QUESTION_DEV_TEST, 'r') as f:
    print (constants.VQA_OPEN_ENDED_QUESTION_DEV_TEST)
    questions = json.load(f)
    print(len(questions))


/Users/xiangyuliu/sources/scpd/cs231n/project/data/vqa/v2_OpenEnded_mscoco_test-dev2017_questions.json
6


In [37]:
print(len(questions['questions']))
print(questions['questions'][0])


107394
{'image_id': 262144, 'question': 'What credit card company is on the banner in the background?', 'question_id': 262144005}


In [59]:
with open(constants.TEST_OUTPUT.joinpath("test_None_model_shuffle_train-vqa_no_caption_Jun03_02-34-26"), 'r') as f:
    answers = json.load(f)

    print(len(answers))


190056


In [60]:
answers_pd = pd.json_normalize(answers)
questions_pd= pd.json_normalize(questions['questions'])

In [61]:
print(len(questions_pd['question_id']))
print(len(questions_pd['question_id'].unique()))

print(len(answers_pd['question_id']))
print(len(answers_pd['question_id'].unique()))


107394
107394
190056
107394


In [43]:
answered_qids = answers_pd['question_id'].tolist()
print("qis len", len(answered_qids))
missing = questions_pd[~questions_pd['question_id'].isin(answered_qids)]
print(missing)

qis len 142727
        image_id                                          question  \
86653     167101                                Is this dangerous?   
86655     526051  Was the elephant born with face color like that?   
86661      27059                           Is the man's nose huge?   
86670     225858                  Are there magnets on the fridge?   
86672      78904            What airline is named on the building?   
...          ...                                               ...   
107370    220918   What is on top of the mountain in the distance?   
107371    436737                         Are the boys moving fast?   
107374    412561                             What color is the bu?   
107378    199033                     What letter in on his helmet?   
107379    392054                  What happened to the man's kite?   

        question_id  
86653     167101001  
86655     526051003  
86661      27059004  
86670     225858000  
86672      78904000  
...         

In [58]:
questions_qids = questions_pd['question_id'].tolist()
print("question qis len", len(questions_qids))
missing = answers_pd[~answers_pd['question_id'].isin(questions_qids)]
print(missing)

question qis len 107394
Empty DataFrame
Columns: [question_id, answer]
Index: []


In [62]:
unique_answers_pd = answers_pd.drop_duplicates(subset=['question_id'])
unique_answers_pd.to_json(path_or_buf=constants.TEST_OUTPUT.joinpath(
    'test_None_model_shuffle_train-vqa_no_caption_Jun03_02-34-26.txt'))

In [63]:
with open(constants.TEST_OUTPUT.joinpath("test_None_model_shuffle_train-vqa_no_caption_Jun03_02-34-26.txt"), 'r') as f:
    answers = json.load(f)



In [64]:
questions_pd[questions_pd['question_id'] == 433181000]

Unnamed: 0,image_id,question,question_id
147,433181,Is the grass green?,433181000


In [32]:
ans_train_pd = read_answers_to_pd(constants.VQA_OPEN_ENDED_ANSWER_TRAIN)

print(ans_train_pd)

            question_type    multiple_choice_answer  \
0            what is this                       net   
1                    what                   pitcher   
2       what color is the                    orange   
3                 is this                       yes   
4       what color is the                     white   
...                   ...                       ...   
443752  what color is the                     black   
443753         is there a                        no   
443754  what color is the                     black   
443755                why  one is easier to type on   
443756          is that a                       yes   

                                                  answers  image_id  \
0       [{'answer': 'net', 'answer_confidence': 'maybe...    458752   
1       [{'answer': 'pitcher', 'answer_confidence': 'y...    458752   
2       [{'answer': 'orange', 'answer_confidence': 'ye...    458752   
3       [{'answer': 'yes', 'answer_confidence': 'yes',.

In [33]:
ans_train_pd['answer_type'].unique()

array(['other', 'yes/no', 'number'], dtype=object)

In [34]:
yn = ans_train_pd[ans_train_pd['answer_type'] == 'yes/no']
other = ans_train_pd[ans_train_pd['answer_type'] == 'other']
number = ans_train_pd[ans_train_pd['answer_type'] == 'number']

print("ans_train_pd", len(ans_train_pd))
print("yn", len(yn))
print("other", len(other))
print("number", len(number))


ans_train_pd 443757
yn 166882
other 219269
number 57606


In [31]:
train = dataset.Coco()



Downloading split 'train' to '/Users/xiangyuliu/sources/fiftyone_dataset_zoo/coco-2017/train' if necessary
Found annotations at '/Users/xiangyuliu/sources/fiftyone_dataset_zoo/coco-2017/raw/instances_train2017.json'
Images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'coco-2017-train'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use
<imagedata.ImageData object at 0x35ce58e30>


In [33]:

first = train.__getitem__(1)
print(first.annotations)

{'captions': ['A giraffe eating food from the top of the tree.', 'A giraffe standing up nearby a tree ', 'A giraffe mother with its baby in the forest.', 'Two giraffes standing in a tree filled area.', 'A giraffe standing next to a forest filled with trees.'], 'qa': ['[QUESTION] what is in front of the giraffes? [ANSWER] tree [END]', '[QUESTION] what do these giraffes have in common? [ANSWER] eating [END]', '[QUESTION] could this photo be from a zoo? [ANSWER] yes [END]', '[QUESTION] are the animals eating? [ANSWER] yes [END]', '[QUESTION] where is the giraffe? [ANSWER] near tree [END]', '[QUESTION] is there a zebra? [ANSWER] no [END]', '[QUESTION] what is the giraffe standing behind? [ANSWER] tree [END]', '[QUESTION] is the giraffe eating the tree? [ANSWER] yes [END]', '[QUESTION] are both giraffes standing? [ANSWER] no [END]', '[QUESTION] are they at a zoo? [ANSWER] yes [END]', '[QUESTION] what is on the ground next to the giraffe on the right? [ANSWER] log [END]', '[QUESTION] are som

In [126]:
start = 0
def sample(json_object, length):
    for i in range(length):
        pair = json_object[start + i]
        qid = pair["question_id"]
        answer = pair['answer']
        print(f"{qid}->{answer}")

def get_merged_dataframe(str_file):
    with open(constants.TEST_OUTPUT.joinpath(str_file), 'r') as f:
        data = json.load(f)
#        print("read data:", len(data))
        data = json.loads(data) 
        print("load json",len(data))
#        sample(data, 10)
        prediction = pd.json_normalize(data).rename(columns={"answer": "predicted_answer"})
#        print("changeing to pandas",prediction)
        real_answers_pd = val.answers
        qids = prediction['question_id'].tolist()
        filtered_real_answers = real_answers_pd[real_answers_pd['question_id'].isin(qids)]
        filtered_questions = val.questions[val.questions['question_id'].isin(qids)]
#        print("filtered real answers", filtered)
        result = pd.merge(filtered_real_answers, prediction, on="question_id")
        result = pd.merge(result, filtered_questions, on=["question_id", "image_id"])
#        print("joined result", result)
        return result;
    
def accuracy_df(df):
    print("total questions:", len(df))
    diffs= df[df['predicted_answer'] != df['multiple_choice_answer']]
    print("diff answers:", len(diffs))
    accuracy = (1.- len(diffs)/len(df)) * 100.
    print("accuracy:", accuracy, "%")
    print(diffs)
    return diffs

In [127]:
json_str = 'val_model_train-vqa_with_caption_Jun02_17-27-12-epoch-0-batch-1949'

diff_first_64 = get_merged_dataframe(json_str)
diff = accuracy_df(diff_first_64)
print(diff)


load json 364
total questions: 364
diff answers: 0
accuracy: 100.0 %
Empty DataFrame
Columns: [image_id, multiple_choice_answer, question_id, predicted_answer, question]
Index: []
Empty DataFrame
Columns: [image_id, multiple_choice_answer, question_id, predicted_answer, question]
Index: []


In [128]:
start = 360
print("val len:", len(val.questions))
diff_first_320 = get_merged_dataframe('val_first_50_model_train-vqa_with_caption_Jun02_17-27-12-epoch-0-batch-1949')
accuracy = accuracy_df(diff_first_320)

val len: 214354
load json 8303
total questions: 8303
diff answers: 72
accuracy: 99.1328435505239 %
      image_id multiple_choice_answer  question_id predicted_answer  \
268     131431                   6:56    131431004           6 : 56   
270     131431                   6:55    131431006           6 : 55   
271     131431                   6:55    131431007           6 : 55   
530     135890                  10:25    135890004          10 : 25   
844       7888                   6:55      7888000           6 : 55   
...        ...                    ...          ...              ...   
7773    120584                   7:22    120584001           7 : 22   
7776    120584                   7:23    120584004           7 : 23   
7916    124442             06/07/2006    124442003   06 / 07 / 2006   
7984    125572                  10:45    125572002          10 : 45   
8130    130465                   3:55    130465003           3 : 55   

                                     question  


In [129]:
result = 'val_200_model_shuffle_train-vqa_with_caption_Jun02_17-27-12-epoch-0-batch-1949'
diff = get_merged_dataframe(result)
diff = accuracy_df(diff)
print(diff)

load json 26333
total questions: 26333
diff answers: 187
accuracy: 99.28986442866365 %
       image_id multiple_choice_answer  question_id predicted_answer  \
490      526197                    .25    526197006             . 25   
504      526256                   1:10    526256000           1 : 10   
542      131431                   6:56    131431004           6 : 56   
544      131431                   6:55    131431006           6 : 55   
545      131431                   6:55    131431007           6 : 55   
...         ...                    ...          ...              ...   
26081    391648                   9:30    391648006           9 : 30   
26128    522940                  4-way    522940005          4 - way   
26188    392228                   5:36    392228000           5 : 36   
26229    392722             07.07.2013    392722001     07. 07. 2013   
26305    393014                   5.00    393014001            5. 00   

                                           quest

In [125]:
result = 'val_200_model_shuffle_train-vqa_no_caption_Jun03_02-34-26'
diff = get_merged_dataframe(result)
diff = accuracy_df(diff)
print(diff)

total questions: 26333
diff answers: 187
accuracy: 99.28986442866365 %
       image_id multiple_choice_answer  question_id predicted_answer  \
490      526197                    .25    526197006             . 25   
504      526256                   1:10    526256000           1 : 10   
542      131431                   6:56    131431004           6 : 56   
544      131431                   6:55    131431006           6 : 55   
545      131431                   6:55    131431007           6 : 55   
...         ...                    ...          ...              ...   
26081    391648                   9:30    391648006           9 : 30   
26128    522940                  4-way    522940005          4 - way   
26188    392228                   5:36    392228000           5 : 36   
26229    392722             07.07.2013    392722001     07. 07. 2013   
26305    393014                   5.00    393014001            5. 00   

                                           question  
490       

In [135]:
result = 'val_1000_model_shuffle_train-vqa_no_caption_Jun03_02-34-26'
diff = get_merged_dataframe(result)
diff = accuracy_df(diff)
print(diff)

load json 26333
total questions: 26333
diff answers: 187
accuracy: 99.28986442866365 %
       image_id multiple_choice_answer  question_id predicted_answer  \
490      526197                    .25    526197006             . 25   
504      526256                   1:10    526256000           1 : 10   
542      131431                   6:56    131431004           6 : 56   
544      131431                   6:55    131431006           6 : 55   
545      131431                   6:55    131431007           6 : 55   
...         ...                    ...          ...              ...   
26081    391648                   9:30    391648006           9 : 30   
26128    522940                  4-way    522940005          4 - way   
26188    392228                   5:36    392228000           5 : 36   
26229    392722             07.07.2013    392722001     07. 07. 2013   
26305    393014                   5.00    393014001            5. 00   

                                           quest

In [134]:
print(diff.to_string())

       image_id           multiple_choice_answer  question_id                    predicted_answer                                                question
490      526197                              .25    526197006                                . 25                                How much are the donuts?
504      526256                             1:10    526256000                              1 : 10                          What time does the clock read?
542      131431                             6:56    131431004                              6 : 56                                        What time is it?
544      131431                             6:55    131431006                              6 : 55                           What time does the clock say?
545      131431                             6:55    131431007                              6 : 55                          What time does the clock read?
618      395801                             6:00    395801007               

In [4]:
print(data)

[{"question_id": 139000, "answer": "talking"}, {"question_id": 139001, "answer": "1"}, {"question_id": 139002, "answer": "brown"}, {"question_id": 139003, "answer": "pink"}, {"question_id": 285000, "answer": "yes"}, {"question_id": 285001, "answer": "yes"}, {"question_id": 285002, "answer": "yes"}, {"question_id": 632000, "answer": "picture"}, {"question_id": 632001, "answer": "bedroom"}, {"question_id": 632002, "answer": "no"}, {"question_id": 632003, "answer": "no"}, {"question_id": 632004, "answer": "0"}, {"question_id": 632005, "answer": "reading"}, {"question_id": 632006, "answer": "yes"}, {"question_id": 632007, "answer": "none"}, {"question_id": 632008, "answer": "no"}, {"question_id": 632009, "answer": "books"}, {"question_id": 632010, "answer": "white"}, {"question_id": 632011, "answer": "bed"}, {"question_id": 632012, "answer": "corner"}, {"question_id": 632013, "answer": "no"}, {"question_id": 724000, "answer": "yes"}, {"question_id": 724001, "answer": "yes"}, {"question_id"

In [80]:
no_caption = "val_first_50_model_train-vqa_no_caption_Jun03_02-34-26"
diff_first_320_no_caption = get_merged_dataframe(no_caption)
accuracy_no_caption = accuracy_df(diff_first_320_no_caption)

accuracy: 1.0%


In [136]:
tokenizer  = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
# Add the Q and A token as special token
tokenizer.add_special_tokens(constants.QA_TOKEN_DICT)




4

In [142]:
for s in [".25", "6:56", "keep elephants out/in"]:
    tokens = tokenizer(s)
    decoded = tokenizer.batch_decode([tokens["input_ids"]], skip_special_tokens = True)
    print(s)
    print(decoded[0])

.25
. 25
6:56
6 : 56
keep elephants out/in
keep elephants out / in


In [None]:
# with open(constants.CAPTION_TRAIN, 'r') as f:
#     data = json.load(f)
#     print(data.keys())
#     print(data["annotations"][0])

# with open(constants.VQA_OPEN_ENDED_QUESTION_TRAIN, 'r') as f:
#     data = json.load(f)
#     print(data.keys())
#     print(data["questions"][0])

# with open(constants.VQA_OPEN_ENDED_ANSWER_TRAIN, 'r') as f:
#     data = json.load(f)
#     print(data.keys())
#     print(data["annotations"][0])
    
# with open(constants.CAPTION_VAL, 'r') as f:
#     data = json.load(f)
#     print(data.keys())

# with open(constants.VQA_OPEN_ENDED_QUESTION_VAL, 'r') as f:
#     data = json.load(f)
#     print(data.keys())

# with open(constants.VQA_OPEN_ENDED_ANSWER_VAL, 'r') as f:
#     data = json.load(f)
#     print(data.keys())

#dataset.load(constants.VQA_OPEN_ENDED_QUESTION_TRAIN, ['image_id', 'id', 'caption'])

In [None]:
train = dataset.Coco()
#val = dataset.Coco("validation")
#test = dataset.Coco("test")

In [None]:
print(len(train))
print(len(train.captions))

In [None]:
train.dataset.values

In [None]:
train.dataset.values("ground_truth")

In [None]:
if False: # debug
    img = train.__getitem__(1)
    print(img)
    print(img.image_id)
    print(img.image_path)

    print(">>>>")
    print(img.captions())

    print(">>>>")
    print(img.qa())
    print("shape", img.image_tensor().shape)

    show([img.image_tensor()])

#plt.imshow(  img.image_tensor().permute(1, 2, 0)  )


In [None]:
tokenizer  = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
# Add the Q and A token as special token
tokenizer.add_special_tokens(constants.QA_TOKEN_DICT)