In [2]:
import os

Data folder structure
--

In [3]:
dataDir ='../../data2'
os.listdir(dataDir)

['v2_mscoco_val2014_complementary_pairs.json',
 'v2_Questions_Val_mscoco.zip',
 'v2_mscoco_val2014_annotations.json',
 'v2_OpenEnded_mscoco_val2014_questions.json',
 'v2_mscoco_train2014_annotations.json',
 'annotations',
 'train2014',
 'v2_OpenEnded_mscoco_train2014_questions.json',
 'v2_Questions_Train_mscoco.zip',
 'v2_mscoco_train2014_complementary_pairs.json',
 'val2014']

Notes on data folder
--
- `annotations`: not required, only for image captioning
- `train2014` and `val2014`: img folders with format e.g. `COCO_val2014_000000059710.jpg`
- `v2_Questions_Train_mscoco.zip` and `v2_Questions_Val_mscoco.zip`: unzip into respective questions `json`

In [4]:
from vqaTools.vqa import VQA

dataDir ='../../data2'
versionType ='v2_' # this should be '' when using VQA v2.0 dataset
taskType ='OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0
dataType ='mscoco'  # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0.
dataSubType ='train2014'
annFile ='{}/{}{}_{}_annotations.json'.format(dataDir, versionType, dataType, dataSubType)
quesFile ='{}/{}{}_{}_{}_questions.json'.format(dataDir, versionType, taskType, dataType, dataSubType)
imgDir = '{}/{}/'.format(dataDir, dataSubType)


In [8]:
# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

loading VQA annotations and questions into memory...
0:00:07.453698
creating index...
index created!


Questions
--

### Structure
```
{
"info" : info,
"task_type" : str,
"data_type": str,
"data_subtype": str,
"questions" : [question],
"license" : license
}

info {
"year" : int,
"version" : str,
"description" : str,
"contributor" : str,
"url" : str,
"date_created" : datetime
}

license{
"name" : str,
"url" : str
}

question{
"question_id" : int,
"image_id" : int,
"question" : str
}
```

### Accessing keys

In [36]:
list(vqa.questions.keys())

['info', 'task_type', 'data_type', 'license', 'data_subtype', 'questions']

### `question` dict 

In [37]:
list(vqa.questions['questions'])[0:3]

[{'image_id': 458752,
  'question': 'What is this photo taken looking through?',
  'question_id': 458752000},
 {'image_id': 458752,
  'question': 'What position is this man playing?',
  'question_id': 458752001},
 {'image_id': 458752,
  'question': 'What color is the players shirt?',
  'question_id': 458752002}]

#### Accessing items in `question` dict by `question_id`

In [41]:
vqa.qqa[458752000]

{'image_id': 458752,
 'question': 'What is this photo taken looking through?',
 'question_id': 458752000}

Annotation
--

### Structure

```
{
"info" : info,
"data_type": str,
"data_subtype": str,
"annotations" : [annotation],
"license" : license
}

info {
"year" : int,
"version" : str,
"description" : str,
"contributor" : str,
"url" : str,
"date_created" : datetime
}

license{
"name" : str,
"url" : str
}

annotation{
"question_id" : int,
"image_id" : int,
"question_type" : str,
"answer_type" : str,
"answers" : [answer],
"multiple_choice_answer" : str
}

answer{
"answer_id" : int,
"answer" : str,
"answer_confidence": str
}
```

### Accessing keys

In [38]:
list(vqa.dataset.keys())

['info', 'license', 'data_subtype', 'annotations', 'data_type']

### `annotation` dict 

In [40]:
list(vqa.dataset['annotations'])[0:2]

[{'question_type': 'what is this',
  'multiple_choice_answer': 'net',
  'answers': [{'answer': 'net', 'answer_confidence': 'maybe', 'answer_id': 1},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 2},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 3},
   {'answer': 'netting', 'answer_confidence': 'yes', 'answer_id': 4},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 5},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 6},
   {'answer': 'mesh', 'answer_confidence': 'maybe', 'answer_id': 7},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 8},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 9},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 10}],
  'image_id': 458752,
  'answer_type': 'other',
  'question_id': 458752000},
 {'question_type': 'what',
  'multiple_choice_answer': 'pitcher',
  'answers': [{'answer': 'pitcher',
    'answer_confidence': 'yes',
    'answer_id': 1},
   {'answer': 'c

#### Accessing items in `annotation` dict by `question_id` 

In [43]:
vqa.qa[458752000]

{'question_type': 'what is this',
 'multiple_choice_answer': 'net',
 'answers': [{'answer': 'net', 'answer_confidence': 'maybe', 'answer_id': 1},
  {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 2},
  {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 3},
  {'answer': 'netting', 'answer_confidence': 'yes', 'answer_id': 4},
  {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 5},
  {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 6},
  {'answer': 'mesh', 'answer_confidence': 'maybe', 'answer_id': 7},
  {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 8},
  {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 9},
  {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 10}],
 'image_id': 458752,
 'answer_type': 'other',
 'question_id': 458752000}

In [23]:
vqa.loadQA([458752000, 458752001, 458752002])

[{'question_type': 'what is this',
  'multiple_choice_answer': 'net',
  'answers': [{'answer': 'net', 'answer_confidence': 'maybe', 'answer_id': 1},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 2},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 3},
   {'answer': 'netting', 'answer_confidence': 'yes', 'answer_id': 4},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 5},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 6},
   {'answer': 'mesh', 'answer_confidence': 'maybe', 'answer_id': 7},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 8},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 9},
   {'answer': 'net', 'answer_confidence': 'yes', 'answer_id': 10}],
  'image_id': 458752,
  'answer_type': 'other',
  'question_id': 458752000},
 {'question_type': 'what',
  'multiple_choice_answer': 'pitcher',
  'answers': [{'answer': 'pitcher',
    'answer_confidence': 'yes',
    'answer_id': 1},
   {'answer': 'c

In [44]:
# annIds = vqa.getQuesIds(quesTypes='how many');   
# anns = vqa.loadQA(annIds)
# anns
# randomAnn = random.choice(anns)
# vqa.showQA([randomAnn])
# imgId = randomAnn['image_id']
# imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg'

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader

In [None]:
class COCODataset(Dataset):
    
    def __init__(self, imgdir, anndir):
        """
        Args:
        imgdir: image directory
        anndir: COCO annotations directory

        """
        self.imgdir = imgdir
        self.anndir = anndir
        
    def __getitem__(self, index):
        """
        returns an (image, caption) pair
        """
        
        
    def __len__():
        
    

In [7]:
import sys
print(sys.executable)

/home/ubuntu/anaconda3/envs/AI_Proj/bin/python
