In [1]:
from configs.default import _C as config
from configs.default import update_config

from datasets import coco_parse
from datasets import flickr8k_parse

from keras import Model 
from keras.applications import VGG16
from tensorflow.python.client import device_lib

from models import image_preprocessing, transfer_models

import numpy as np
import os
import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Image encoding

Currently, an image encoder is built using VGG16 architecture pre-trained on imagenet database.

The features were obtained from the "fc2" layer - last fully-connected layer before the predictions layer.

Generated features in numpy arrays for both training and validation datasets were saved to .npy files.

### COCO dataset

In [2]:
config_file = "./configs/baseline.yaml"
update_config(config, config_file)

In [3]:
if config.DATASET == 'Coco':
    if config.ATTENTION:
        features_file_train = "vgg16_coco_train_attn.npy"
        features_file_val = "vgg16_coco_val_attn.npy"
        features_file_test = "vgg16_coco_test_attn.npy"
    else:
        features_file_train = "vgg16_coco_train.npy"
        features_file_val = "vgg16_coco_val.npy"
        features_file_test = "vgg16_coco_test.npy"
    
    
    val_filenames_with_captions = coco_parse.get_image_filename_with_caption(config.ANNOTATIONS_PATH, 
                                                                             config.IMG_PATH, 
                                                                             train=False)

    val_filenames_with_all_captions = coco_parse.get_image_with_all_captions(val_filenames_with_captions)

    train_filenames_with_captions = coco_parse.get_image_filename_with_caption(config.ANNOTATIONS_PATH, 
                                                                               config.IMG_PATH,
                                                                               train=True)
    train_filenames_with_all_captions = coco_parse.get_image_with_all_captions(train_filenames_with_captions)

### Flickr8k dataset

In [4]:
if config.DATASET == 'Flickr8k':
    captions_file = os.path.join(config.ANNOTATIONS_PATH, "Flickr8k.token.txt")
    train_txt_path = os.path.join(config.ANNOTATIONS_PATH, "Flickr_8k.trainImages.txt")
    dev_txt_path = os.path.join(config.ANNOTATIONS_PATH, "Flickr_8k.devImages.txt")
    test_txt_path = os.path.join(config.ANNOTATIONS_PATH, "Flickr_8k.testImages.txt")
    
    if config.ATTENTION:
        features_file_train = "vgg16_flickr8k_train_attn.npy"
        features_file_val = "vgg16_flickr8k_val_attn.npy"
        features_file_test = "vgg16_flickr8k_test_attn.npy"
    else:
        features_file_train = "vgg16_flickr8k_train.npy"
        features_file_val = "vgg16_flickr8k_val.npy"
        features_file_test = "vgg16_flickr8k_test.npy"

    filenames_with_all_captions = flickr8k_parse.generate_filenames_with_all_captions(captions_file, 
                                                                                      config.IMG_PATH)
    train_filenames_with_all_captions = flickr8k_parse.generate_set(train_txt_path, 
                                                                    filenames_with_all_captions,
                                                                    config.IMG_PATH)
    val_filenames_with_all_captions = flickr8k_parse.generate_set(dev_txt_path, 
                                                                  filenames_with_all_captions, 
                                                                  config.IMG_PATH)
    test_filenames_with_all_captions = flickr8k_parse.generate_set(test_txt_path, 
                                                                   filenames_with_all_captions, 
                                                                   config.IMG_PATH)

In [5]:
print('Number of images in validation dataset: {}'.format(len(val_filenames_with_all_captions)))
print('Number of images in training dataset: {}'.format(len(train_filenames_with_all_captions)))

Number of images in validation dataset: 1000
Number of images in training dataset: 6000


In [6]:
### encode features for validation images
start = time.time()
val_transfer_values = transfer_models.use_pretrained_model_for_images(val_filenames_with_all_captions,
                                                                      config.ATTENTION, 
                                                                      batch_size=config.ENCODER.BATCH_SIZE)
time_val = time.time() - start

Instructions for updating:
Colocations handled automatically by placer.
0% of images processed
5% of images processed
10% of images processed
15% of images processed
20% of images processed
25% of images processed
30% of images processed
35% of images processed
40% of images processed
45% of images processed
50% of images processed
55% of images processed
60% of images processed
65% of images processed
70% of images processed
75% of images processed
80% of images processed
85% of images processed
90% of images processed
95% of images processed
100% of images processed


In [7]:
print('Validation dataset encoding took {:.1f} minutes'.format(time_val / 60))

Validation dataset encoding took 2.1 minutes


In [8]:
### save features for validation images
transfer_models.save_features(val_transfer_values, './cnn_features/', features_file_val)

Array was saved to ./cnn_features/vgg16_flickr8k_val_attn.npy


In [9]:
a = np.load('./cnn_features/vgg16_flickr8k_val_attn.npy')
(a == val_transfer_values).all()

True

In [10]:
### encode features for training images
start = time.time()
train_transfer_values = transfer_models.use_pretrained_model_for_images(train_filenames_with_all_captions, 
                                                                        config.ATTENTION, 
                                                                        batch_size=config.ENCODER.BATCH_SIZE)
time_train = time.time() - start

Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x0000028EC089EB38>>
Traceback (most recent call last):
  File "D:\Anaconda\lib\site-packages\tensorflow\python\client\session.py", line 1455, in __del__
    self._session._session, self._handle, status)
  File "D:\Anaconda\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 528, in __exit__
    c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.CancelledError: Session has been closed.


0% of images processed
5% of images processed
10% of images processed
15% of images processed
20% of images processed
25% of images processed
30% of images processed
35% of images processed
40% of images processed
45% of images processed
50% of images processed
55% of images processed
60% of images processed
65% of images processed
70% of images processed
75% of images processed
80% of images processed
85% of images processed
90% of images processed
95% of images processed


In [11]:
print('Training dataset encoding took {:.1f} minutes'.format(time_train / 60))

Training dataset encoding took 8.1 minutes


In [12]:
transfer_models.save_features(train_transfer_values, './cnn_features/', features_file_train)

Array was saved to ./cnn_features/vgg16_flickr8k_train_attn.npy


In [13]:
a = np.load('./cnn_features/vgg16_flickr8k_train_attn.npy')
(a == train_transfer_values).all()

True

In [14]:
### encode features for training images
start = time.time()
test_transfer_values = transfer_models.use_pretrained_model_for_images(test_filenames_with_all_captions, 
                                                                       config.ATTENTION, 
                                                                       batch_size=config.ENCODER.BATCH_SIZE)
time_train = time.time() - start

0% of images processed
5% of images processed
10% of images processed
15% of images processed
20% of images processed
25% of images processed
30% of images processed
35% of images processed
40% of images processed
45% of images processed
50% of images processed
55% of images processed
60% of images processed
65% of images processed
70% of images processed
75% of images processed
80% of images processed
85% of images processed
90% of images processed
95% of images processed
100% of images processed


In [15]:
transfer_models.save_features(test_transfer_values, './cnn_features/', features_file_test)

Array was saved to ./cnn_features/vgg16_flickr8k_test_attn.npy
