In [1]:
from datasets import flickr8k_parse
from keras import backend as K
from keras import Model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Input, Dense, LSTM, add, Embedding, GRU, Dropout, Multiply, Dot, Lambda, BatchNormalization, \
    RepeatVector, concatenate
from keras.optimizers import RMSprop
from keras.backend.tensorflow_backend import set_session

import batch_generator
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import path_generation
import tensorflow as tf
import text_processing
import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 5614745094127694968, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 1462032793
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 17093751452724026229
 physical_device_desc: "device: 0, name: GeForce 840M, pci bus id: 0000:07:00.0, compute capability: 5.0"]

# Decoder

### Captions encoding

Before building decoder, it is necessary to encode captions into one-hot vectors which further would be used in embedding layer.

### COCO dataset

In [3]:
# captions_path = 'D:/coco/annotations/'
# images_path = 'D:/coco/images/'

# # parse JSON file with captions to get paths to images with captions
# val_filenames_with_captions = coco_parse.get_image_filename_with_caption(captions_path, images_path, 
#                                                                      train=False)
# val_filenames_with_all_captions = coco_parse.get_image_with_all_captions(val_filenames_with_captions)

# train_filenames_with_captions = coco_parse.get_image_filename_with_caption(captions_path, images_path, 
#                                                                      train=True)
# train_filenames_with_all_captions = coco_parse.get_image_with_all_captions(train_filenames_with_captions)

# ### Extract captions
# train_captions = coco_parse.make_list_of_captions(train_filenames_with_all_captions)
# val_captions = coco_parse.make_list_of_captions(val_filenames_with_all_captions)

### Flickr8k dataset

In [4]:
images_path = 'D:/Flickr8k/images/'
annotations_path = 'D:/Flickr8k/annotations/'
captions_file = 'D:/Flickr8k/annotations/Flickr8k.token.txt'
train_txt_path = 'D:/Flickr8k/annotations/Flickr_8k.trainImages.txt'
dev_txt_path = 'D:/Flickr8k/annotations/Flickr_8k.devImages.txt'
test_txt_path = 'D:/Flickr8k/annotations/Flickr_8k.testImages.txt'

filenames_with_all_captions = flickr8k_parse.generate_filenames_with_all_captions(captions_file, images_path)

train_filenames_with_all_captions = flickr8k_parse.generate_set(train_txt_path, filenames_with_all_captions, images_path)
val_filenames_with_all_captions = flickr8k_parse.generate_set(dev_txt_path, filenames_with_all_captions, images_path)
test_filenames_with_all_captions = flickr8k_parse.generate_set(test_txt_path, filenames_with_all_captions, images_path)

train_captions = flickr8k_parse.make_list_of_captions(train_filenames_with_all_captions)
val_captions = flickr8k_parse.make_list_of_captions(val_filenames_with_all_captions)

In [5]:
### Preprocess captions
text_processing.preprocess_captions(val_captions)
text_processing.preprocess_captions(train_captions)

In [6]:
### Add markers of captions' starts and ends
text_processing.add_start_and_end_to_captions(train_captions)
text_processing.add_start_and_end_to_captions(val_captions)

In [7]:
### Create vocabulary from the training captions
train_vocab = text_processing.Vocabulary()
for caption_list in train_captions:
    for caption in caption_list:
        tmp_caption_list = caption.split()
        for word in tmp_caption_list:
            train_vocab.add_word(word)

In [8]:
if not os.path.exists('./vocabulary'):
    os.mkdir('./vocabulary')
train_vocab.save_vocabulary('word_to_id.pickle', 'id_to_word.pickle')

In [9]:
train_captions_tokens = text_processing.tokenise_captions(train_captions, train_vocab)
val_captions_tokens = text_processing.tokenise_captions(val_captions, train_vocab)

In [10]:
train_captions_tokens[0]

[[1, 2, 3, 4, 5, 6, 7, 2, 8, 4, 9, 10, 11, 12],
 [1, 3, 4, 13, 14, 4, 15, 11, 12],
 [1, 16, 17, 18, 19, 20, 21, 10, 22, 23, 12],
 [1, 16, 17, 24, 25, 9, 10, 11, 12],
 [1, 16, 17, 6, 15, 2, 26, 27, 28, 29, 30, 12]]

In [11]:
train_captions[0]

['<sos> a black dog is running after a white dog in the snow <eos>',
 '<sos> black dog chasing brown dog through snow <eos>',
 '<sos> two dogs chase each other across the snowy ground <eos>',
 '<sos> two dogs play together in the snow <eos>',
 '<sos> two dogs running through a low lying body of water <eos>']

### Decoder NN

### GRU

In [14]:
dataset = 'flickr8k'
batch_size = 32
epochs = 20
steps_per_epoch = int(len(train_captions) / batch_size)
initial_state_size = 512
embedding_out_size = 512
number_of_layers = 2
batch_norm = True
dropout = True
gru = False
attn = True
attn_type = 'bahdanau'
max_len = 30
path_gen = path_generation.PathGenerator(gru, dataset, number_of_layers, batch_size, batch_norm, dropout, attn, attn_type)
path_checkpoint = path_gen.get_weights_path()
model_path = path_gen.get_model_path()
callbacks_path = path_gen.get_callbacks_path()

In [15]:
print(callbacks_path)

./callbacks/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.csv


In [16]:
if attn:
    transfer_values = np.load('./cnn_features/vgg16_flickr8k_train_attn.npy')
    val_transfer_values = np.load('./cnn_features/vgg16_flickr8k_val_attn.npy')
else:
    transfer_values = np.load('./cnn_features/vgg16_flickr8k_train.npy')
    val_transfer_values = np.load('./cnn_features/vgg16_flickr8k_val.npy')

In [17]:
if attn:
    print(transfer_values.shape)
    transfer_values = transfer_values.reshape(6000, -1, 512)
    val_transfer_values = val_transfer_values.reshape(1000, -1, 512)
    print(transfer_values.shape)

(6000, 14, 14, 512)
(6000, 196, 512)


In [18]:
# Initialise the required layers as a global vatiables which further will be used in functions
# Encoder input part
encoder_input = Input(shape=(4096,), name='encoder_input')
if attn:
    encoder_input_attn = Input(shape=(transfer_values.shape[1],transfer_values.shape[2]), name='encoder_input')
encoder_reduction = Dense(initial_state_size, activation='relu', name='encoder_reduction')
bn1 = BatchNormalization()
repeat = RepeatVector(max_len)

### Decoder input and embedding
decoder_input = Input(shape=(None,), name='decoder_input')
decoder_input_lstm = Input(shape=(max_len,), name='decoder_input')
embedding = Embedding(input_dim=train_vocab.number_of_words, output_dim=embedding_out_size, mask_zero=True, name='embedding')
drop1 = Dropout(0.5)
### GRU1
gru1 = GRU(initial_state_size, name='GRU1', return_sequences=True)
s0 = Input(shape=(initial_state_size,), name='s0')
c0 = Input(shape=(initial_state_size,), name='c0')
lstm_att = LSTM(initial_state_size, return_state=True)
lstm_att2 = LSTM(initial_state_size, return_sequences=True)
lstm1 = LSTM(initial_state_size, name='LSTM1', return_sequences=True)
bn2 = BatchNormalization()
### GRU2    
gru2 = GRU(initial_state_size, name='GRU2', return_sequences=True)
lstm2 = LSTM(initial_state_size, name='LSTM2', return_sequences=True)
bn3 = BatchNormalization()
### GRU3        
gru3 = GRU(initial_state_size, name='GRU3', return_sequences=True)
lstm3 = LSTM(initial_state_size, name='LSTM3', return_sequences=True)
bn4 = BatchNormalization()

decoder_dense = Dense(train_vocab.number_of_words, activation='softmax', name='decoder_output')

In [19]:
def connect_transfer_values_gru(transfer_values):
    """
    Connects extracted image features to sentences and passes to GRU.
    Image features are the initial state of GRU while sentences are the first input words.
    """
    ### process encoder values
    initial_state = encoder_reduction(transfer_values)
    if batch_norm:
        initial_state = bn1(initial_state)
    ### pass sentences to embedding
    X = decoder_input
    X = embedding(X)
    if dropout:
        X = drop1(X)
    ### RNN
    X = gru1(X, initial_state=initial_state)
    if batch_norm:
        X = bn2(X)
    if number_of_layers >= 2:
        X = gru2(X, initial_state=initial_state)
        if batch_norm:
            X = bn3(X)
    if number_of_layers == 3:
        X = gru3(X, initial_state=initial_state)
        if batch_norm:
            X = bn4(X)
    ### pass the outputs of RNNs to final dense layer which returns a one-hot vector for each word
    decoder_output = decoder_dense(X)
    return decoder_output

def connect_transfer_values_lstm(transfer_values, max_len=40):
    """
    Connects extracted image features to sentences and passes to LSTM.
    Concatenated image features and sentences are LSTM inputs.
    """
    features = encoder_reduction(transfer_values)
    if batch_norm:
        features = bn1(features)
    features = repeat(features)
 
    X = decoder_input_lstm
    X = embedding(X)
    if dropout:
        X = drop1(X)
    
    X = concatenate([features, X])
    print(X.shape)
    X = lstm1(X)
    print(X.shape)

    if batch_norm:
        X = bn2(X)
    if number_of_layers >= 2:
        X = lstm2(X)
        if batch_norm:
            X = bn3(X)
    if number_of_layers == 3:
        X = lstm3(X)
        if batch_norm:
            X = bn4(X)

    decoder_output = decoder_dense(X)
    return decoder_output

In [20]:
densor_s = Dense(initial_state_size)
densor_feat = Dense(initial_state_size)
gating_scalar_func = Dense(initial_state_size, activation='sigmoid')
densor2 = Dense(1)

In [21]:
def bahdanau_attention(a, s_prev, i):
    """
    Produces context vector for a given pair of image features and previous hidden state using Bahdanau additive attention
    """
    print('------------------------')
    print('Attention')
    print('img features', a.shape)
    print('prev state', s_prev.shape)
    a_dense = densor_feat(a)
    print('a_dense', a_dense.shape)
    s_prev = Lambda(lambda x: K.expand_dims(x, 1))(s_prev)
    s_dense = densor_s(s_prev)
    print('s_dense', s_dense.shape)
    sum_dense = add([a_dense, s_dense])
    print('summary', sum_dense.shape)
    concat = Lambda(lambda x: K.tanh(x))(sum_dense)
    print('first_dense', concat.shape)
    weights = densor2(concat)
    weights = Lambda(lambda x: K.softmax(x, axis=1), name='weights_{}'.format(i))(weights)
    print('weights', weights.shape)
    context = Dot(axes=1)([weights, a])
    gating_scalar = gating_scalar_func(s_prev)
    context = Multiply()([context, gating_scalar])
    print('context', context.shape)
    print('------------------------')
    return context


def scaled_dot_product_attention(a, s_prev, i, initital_state_size):
    """
    Produces context vector for a given pair of image features and previous hidden state using scaled dot-product attention
    """
    print('------------------------')
    print('Attention')
    print('img features', a.shape)
    print('prev state', s_prev.shape)
    s_prev = Lambda(lambda x: K.expand_dims(x, 1))(s_prev)
    dot_prod = Dot(axes=2)([a, s_prev])
    print('dot prod', dot_prod.shape)
    scaled_dot_prod = Lambda(lambda x: x / np.sqrt(512))(dot_prod)
    print('dot prod', dot_prod.shape)
    weights = densor2(scaled_dot_prod)
    weights = Lambda(lambda x: K.softmax(x, axis=1), name='weights_{}'.format(i))(weights)
    print('weights', weights.shape)
    context = Dot(axes=1)([weights, a])
    print('context', context.shape)
    print('------------------------')
    return context

In [22]:
def connect_transfer_values_lstm_attention(features, max_len=30, initial_state_size=512, attn='bahdanau'):
    """
    Connects the transfer values to words and pass to LSTM with attention.
    """
    print('Initial features shape', features.shape)
    X = decoder_input_lstm
    X = embedding(X)
    print('word-embedding', X.shape)
    if dropout:
        X = drop1(X)
        
    print('Initial states')
    s0 = Lambda(lambda x: K.mean(x, axis=1))(features)
    s0 = Dense(initial_state_size, activation='relu')(s0)
    s0 = BatchNormalization()(s0)
    s = s0
    print('s initial', s.shape)
    c0 = Lambda(lambda x: K.mean(x, axis=1))(features)
    c0 = Dense(initial_state_size, activation='relu')(c0)
    c0 = BatchNormalization()(c0)
    c = c0
    print('c initial', c.shape)
    lstm_att_out = []
    for i in range(max_len):
        print('------------------------')
        print('LSTM iteration {}'.format(i))
        if attn == 'bahdanau':
            context = bahdanau_attention(features, s, i)
        elif attn == 'scaled_dot':
            context = scaled_dot_product_attention(features, s, i, initial_state_size)
        else:
            raise ValueError('No such attention mechanism')
        print('context', context.shape) 
        tmp_X = Lambda(lambda x, t: K.expand_dims(x[:, t], axis=1), arguments={'t': i}, output_shape=lambda s: (s[0], 1, s[2]))(X)
        print('current word vector', tmp_X.shape)
        concat = concatenate([context, tmp_X])
        print('lstm input: context-word concat', concat.shape)
        s, _, c = lstm_att(concat, initial_state=[s, c])
        print('hidden state', s.shape)
        lstm_att_out.append(s)
    out = Lambda(lambda x: K.stack(x, axis=1))(lstm_att_out)
    print('final lstm output shape', X.shape)
    if batch_norm:
        out = bn2(out)
    
    if number_of_layers == 2:
        out = lstm_att2(out, initial_state=[s0, c0])
    if batch_norm:
        out = bn3(out)
    decoder_output = decoder_dense(out)
    print('output', decoder_output.shape)
    return decoder_output

In [23]:
if gru:
    generator = batch_generator.generate_batch(transfer_values, train_captions_tokens, number_of_words=train_vocab.number_of_words, batch_size=batch_size)
    val_generator = batch_generator.generate_batch(val_transfer_values, val_captions_tokens, number_of_words=train_vocab.number_of_words, batch_size=batch_size)
else:
    generator = batch_generator.generate_batch(transfer_values, train_captions_tokens, number_of_words=train_vocab.number_of_words, batch_size=batch_size, gru=False)
    val_generator = batch_generator.generate_batch(val_transfer_values, val_captions_tokens, number_of_words=train_vocab.number_of_words, batch_size=batch_size, gru=False)              

In [24]:
optimizer = RMSprop(lr=1e-3, decay=1e-6)

Instructions for updating:
Colocations handled automatically by placer.


In [25]:
if gru:
    decoder_output = connect_transfer_values_gru(encoder_input)
else:
    if attn:
        decoder_output = connect_transfer_values_lstm_attention(encoder_input_attn, attn=attn_type)
    else:
        decoder_output = connect_transfer_values_lstm(encoder_input)

Initial features shape (?, 196, 512)
word-embedding (?, 30, 512)
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Initial states
s initial (?, 512)
c initial (?, 512)
------------------------
LSTM iteration 0
------------------------
Attention
img features (?, 196, 512)
prev state (?, 512)
a_dense (?, 196, 512)
s_dense (?, 1, 512)
summary (?, 196, 512)
first_dense (?, 196, 512)
weights (?, 196, 1)
context (?, 1, 512)
------------------------
context (?, 1, 512)
current word vector (?, 1, 512)
lstm input: context-word concat (?, 1, 1024)
hidden state (?, 512)
------------------------
LSTM iteration 1
------------------------
Attention
img features (?, 196, 512)
prev state (?, 512)
a_dense (?, 196, 512)
s_dense (?, 1, 512)
summary (?, 196, 512)
first_dense (?, 196, 512)
weights (?, 196, 1)
context (?, 1, 512)
------------------------
context (?, 1, 512)
current word vector (?, 1, 512)
lstm input: context-word concat (?, 1,

In [26]:
if gru:
    decoder_model = Model(inputs=[encoder_input, decoder_input], outputs=[decoder_output])
else:
    if attn:
        decoder_model = Model(inputs=[encoder_input_attn, decoder_input_lstm], outputs=[decoder_output])
    else:
        decoder_model = Model(inputs=[encoder_input, decoder_input_lstm], outputs=[decoder_output])

In [27]:
decoder_model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy')

In [28]:
decoder_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 196, 512)     0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 512)          0           encoder_input[0][0]              
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, 512)          262656      lambda_1[0][0]                   
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 512)          2048        dense_5[0][0]                    
__________________________________________________________________________________________________
lambda_3 (

__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 512)          2048        dense_6[0][0]                    
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 512), (None, 3147776     concatenate_1[0][0]              
                                                                 batch_normalization_5[0][0]      
                                                                 batch_normalization_6[0][0]      
                                                                 concatenate_2[0][0]              
                                                                 lstm_1[0][0]                     
                                                                 lstm_1[0][2]                     
                                                                 concatenate_3[0][0]              
          

                                                                 encoder_input[0][0]              
__________________________________________________________________________________________________
multiply_5 (Multiply)           (None, 1, 512)       0           dot_5[0][0]                      
                                                                 dense_3[4][0]                    
__________________________________________________________________________________________________
lambda_17 (Lambda)              (None, 1, 512)       0           dropout_1[0][0]                  
__________________________________________________________________________________________________
concatenate_5 (Concatenate)     (None, 1, 1024)      0           multiply_5[0][0]                 
                                                                 lambda_17[0][0]                  
__________________________________________________________________________________________________
lambda_18 

__________________________________________________________________________________________________
lambda_41 (Lambda)              (None, 1, 512)       0           dropout_1[0][0]                  
__________________________________________________________________________________________________
concatenate_13 (Concatenate)    (None, 1, 1024)      0           multiply_13[0][0]                
                                                                 lambda_41[0][0]                  
__________________________________________________________________________________________________
lambda_42 (Lambda)              (None, 1, 512)       0           lstm_1[12][0]                    
__________________________________________________________________________________________________
add_14 (Add)                    (None, 196, 512)     0           dense_2[13][0]                   
                                                                 dense_1[13][0]                   
__________

                                                                 lambda_65[0][0]                  
__________________________________________________________________________________________________
lambda_66 (Lambda)              (None, 1, 512)       0           lstm_1[20][0]                    
__________________________________________________________________________________________________
add_22 (Add)                    (None, 196, 512)     0           dense_2[21][0]                   
                                                                 dense_1[21][0]                   
__________________________________________________________________________________________________
lambda_67 (Lambda)              (None, 196, 512)     0           add_22[0][0]                     
__________________________________________________________________________________________________
weights_21 (Lambda)             (None, 196, 1)       0           dense_4[21][0]                   
__________

add_30 (Add)                    (None, 196, 512)     0           dense_2[29][0]                   
                                                                 dense_1[29][0]                   
__________________________________________________________________________________________________
lambda_91 (Lambda)              (None, 196, 512)     0           add_30[0][0]                     
__________________________________________________________________________________________________
weights_29 (Lambda)             (None, 196, 1)       0           dense_4[29][0]                   
__________________________________________________________________________________________________
dot_30 (Dot)                    (None, 1, 512)       0           weights_29[0][0]                 
                                                                 encoder_input[0][0]              
__________________________________________________________________________________________________
multiply_3

In [29]:
model_json = decoder_model.to_json()
try:
    os.mkdir('./models')
except:
    print('The folder already exists')
with open(model_path, "w") as json_file:
    json.dump(json.loads(model_json), json_file, indent=4)

  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '


  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '
  '. They will not be included '


The folder already exists


### Checkpoints

During the training process, it is a good idea to save the weights periodically.

In [30]:
try:
    os.mkdir('./weights/')
except:
    print('The folder already exists')

checkpoints = ModelCheckpoint(path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                              patience=2, verbose=1, min_lr=0.00001)

The folder already exists


In [31]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))
start = time.time()
callbacks = decoder_model.fit_generator(generator=generator,
                            steps_per_epoch=steps_per_epoch,
                            epochs=epochs,
                            callbacks=[checkpoints, reduce_lr],
                            validation_data=val_generator,
                            validation_steps=5)
time_train = time.time() - start

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/20

Epoch 00001: val_loss improved from inf to 11.56038, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 11.56038 to 10.80313, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 3/20



Epoch 00003: val_loss improved from 10.80313 to 1.44194, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 4/20

Epoch 00004: val_loss did not improve from 1.44194
Epoch 5/20



Epoch 00005: val_loss did not improve from 1.44194

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/20

Epoch 00006: val_loss improved from 1.44194 to 1.34808, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 7/20



Epoch 00007: val_loss improved from 1.34808 to 1.33426, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 8/20

Epoch 00008: val_loss did not improve from 1.33426
Epoch 9/20



Epoch 00009: val_loss did not improve from 1.33426

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 10/20

Epoch 00010: val_loss improved from 1.33426 to 1.32666, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 11/20



Epoch 00011: val_loss did not improve from 1.32666
Epoch 12/20

Epoch 00012: val_loss did not improve from 1.32666

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 13/20



Epoch 00013: val_loss did not improve from 1.32666
Epoch 14/20

Epoch 00014: val_loss did not improve from 1.32666

Epoch 00014: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 15/20



Epoch 00015: val_loss improved from 1.32666 to 1.26773, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 16/20

Epoch 00016: val_loss improved from 1.26773 to 1.24836, saving model to ./weights/VGG16_LSTM_flickr8k_2l_32b_bn_dr_attn_bahdanau.hdf5
Epoch 17/20



Epoch 00017: val_loss did not improve from 1.24836
Epoch 18/20

Epoch 00018: val_loss did not improve from 1.24836

Epoch 00018: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 19/20



Epoch 00019: val_loss did not improve from 1.24836
Epoch 20/20

Epoch 00020: val_loss did not improve from 1.24836

Epoch 00020: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.


In [32]:
print("Time for training: {} seconds".format(time_train))

Time for training: 6076.838664531708 seconds


In [33]:
if not os.path.exists('./callbacks'):
    os.mkdir('./callbacks')   
columns = callbacks.history.keys()

In [34]:
callback_df = pd.DataFrame(callbacks.history)
callback_df.to_csv(callbacks_path, index=None)