#**Setup** 
Clone repo, install dependencies and check PyTorch and GPU.

In [None]:
!pip install --upgrade gensim==4.0.0b0
!pip install opencv-python

Collecting gensim==4.0.0b0
[?25l  Downloading https://files.pythonhosted.org/packages/16/4a/c529159de5a417eb2f574941ccd9f937a47cafffaf1a3e485c6e2a8a4153/gensim-4.0.0b0-cp36-cp36m-manylinux1_x86_64.whl (24.0MB)
[K     |████████████████████████████████| 24.0MB 133kB/s 
Installing collected packages: gensim
  Found existing installation: gensim 3.6.0
    Uninstalling gensim-3.6.0:
      Successfully uninstalled gensim-3.6.0
Successfully installed gensim-4.0.0b0


In [None]:
!git clone https://github.com/glad-gradient/visual_qa.git

Cloning into 'visual_qa'...
remote: Enumerating objects: 181, done.[K
remote: Counting objects: 100% (181/181), done.[K
remote: Compressing objects: 100% (116/116), done.[K
remote: Total 181 (delta 105), reused 132 (delta 59), pack-reused 0[K
Receiving objects: 100% (181/181), 27.76 KiB | 5.55 MiB/s, done.
Resolving deltas: 100% (105/105), done.


In [None]:
import sys
sys.path.append('visual_qa/')

from IPython.display import clear_output

import json
from glob import glob
import logging
import os
import time
import matplotlib.pyplot as plt
from multiprocessing import cpu_count

import numpy as np
from gensim import downloader
import torch
import torchvision
import cv2
import nltk
nltk.download('popular')
from nltk.tokenize import word_tokenize
from google.colab import drive


from visual_qa.utils.build_vocabs import Vocabulary, AnswerVocabulary
from visual_qa.utils.data_preparation import DataGenerator
from visual_qa.model import ImageEncoder, QuestionEncoder, VisualQAModel
from visual_qa.utils.misc import configs
from visual_qa.utils.enums import Modes

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.7.0+cu101 _CudaDeviceProperties(name='Tesla P100-PCIE-16GB', major=6, minor=0, total_memory=16280MB, multi_processor_count=56)


In [None]:
# mount Google Drive
drive.mount('/content/gdrive')

!ln -s "/content/gdrive/My Drive" "/content/mydrive"

Mounted at /content/gdrive


In [None]:
ROOT_DIR_PATH = "/content/mydrive/Projects/VQA"
CHECKPOINT_DIR = '{}/{}'.format(ROOT_DIR_PATH, 'models')

DATA_DIR = '/content/data'
CONFIG_FILE = '/content/visual_qa/configs.json'

print('***** Model checkpoint directory: {} *****'.format(CHECKPOINT_DIR))

***** Model checkpoint directory: /content/mydrive/Projects/VQA/models *****


In [None]:
! mkdir logs
! mkdir data
! mkdir data/images
! mkdir data/questions
! mkdir data/answers

#**Download data**

In [None]:
def download_vqa(data_dir, load_train=True, load_valid=True, load_test=False):
    image_dir = f'{data_dir}/images'
    question_dir = f'{data_dir}/questions'
    answer_dir = f'{data_dir}/answers'

    # Download and unzip images
    if load_train:
        os.system(f'wget http://images.cocodataset.org/zips/train2014.zip -P {image_dir}')
        os.system(f'unzip {image_dir}/train2014.zip -d {image_dir}/')

        ! rm /content/data/images/train2014.zip

    if load_valid:
        os.system(f'wget http://images.cocodataset.org/zips/val2014.zip -P {image_dir}')
        os.system(f'unzip {image_dir}/val2014.zip -d {image_dir}/')
        
        ! rm /content/data/images/val2014.zip

    if load_test:
        os.system(f'wget http://images.cocodataset.org/zips/test2015.zip -P {image_dir}')
        os.system(f'unzip {image_dir}/test2015.zip -d {image_dir}/')

    # Download and unzip the VQA Questions
    if load_train:
        os.system(f'wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Train_mscoco.zip -P {question_dir}')
        os.system(f'unzip {question_dir}/v2_Questions_Train_mscoco.zip -d {question_dir}')

        ! rm /content/data/questions/v2_Questions_Train_mscoco.zip

    if load_valid:
        os.system(f'wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip -P {question_dir}')
        os.system(f'unzip {question_dir}/v2_Questions_Val_mscoco.zip -d {question_dir}')

        ! rm /content/data/questions/v2_Questions_Val_mscoco.zip

    if load_test:
        os.system(f'wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Test_mscoco.zip -P {question_dir}')
        os.system(f'unzip {question_dir}/v2_Questions_Test_mscoco.zip -d {question_dir}')

    # Download and unzip the VQA Annotations
    if load_train:
        os.system(f'wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Train_mscoco.zip -P {answer_dir}')
        os.system(f'unzip {answer_dir}/v2_Annotations_Train_mscoco.zip -d {answer_dir}')

        ! rm /content/data/answers/v2_Annotations_Train_mscoco.zip

    if load_valid:
        os.system(f'wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip -P {answer_dir}')
        os.system(f'unzip {answer_dir}/v2_Annotations_Val_mscoco.zip -d {answer_dir}')

        ! rm /content/data/answers/v2_Annotations_Val_mscoco.zip

In [None]:
download_vqa(data_dir=DATA_DIR, load_train=True, load_valid=True, load_test=False)

#**Training**

In [None]:
!python visual_qa/train.py \
--checkpoint_dir=$CHECKPOINT_DIR \
--log_dir='/content/logs' \
--image_dir=$DATA_DIR/images \
--question_dir=$DATA_DIR/questions \
--answer_dir=$DATA_DIR/answers \
--config_file=$CONFIG_FILE \
--batch_size=256 \
--num_epochs=10 \
--verbose_step=300

INFO:gensim.models.keyedvectors:loading projection weights from /root/gensim-data/word2vec-google-news-300/word2vec-google-news-300.gz
tcmalloc: large alloc 3600007168 bytes == 0x7f1e000 @  0x7f6acacc6001 0x7f6ac76d34ff 0x7f6ac7723ab8 0x7f6ac7727bb7 0x7f6ac77c6003 0x50a4a5 0x50cc96 0x507be4 0x508ec2 0x594a01 0x549e8f 0x5515c1 0x5a9dac 0x50a433 0x50cc96 0x507be4 0x509900 0x50a2fd 0x50cc96 0x507be4 0x509900 0x50a2fd 0x50cc96 0x5095c8 0x50a2fd 0x50beb4 0x507be4 0x509900 0x50a2fd 0x50beb4 0x5095c8
INFO:gensim.models.keyedvectors:loaded (3000000, 300) matrix from /root/gensim-data/word2vec-google-news-300/word2vec-google-news-300.gz
INFO:Main:Vocabularies building...
INFO:Main:Vocabularies have been built.
INFO:Main:Train dataset has been created.
INFO:Main:Validation dataset has been created.
INFO:Main:VisualQA model has been created.
INFO:Trainer:Trainer is ready. Device is cuda
INFO:Trainer:Step 0, loss: 6.91543, time: 5.60653
INFO:Trainer:Step 300, loss: 4.15059, time: 355.72078
INFO:Tr

In [None]:
# ! rm -rf visual_qa