In [12]:
from keras import backend as K
from keras.models import Model, Sequential, load_model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout, GRU
from keras.layers.merge import add
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg19 import VGG19, preprocess_input
from keras.callbacks import ModelCheckpoint

from nltk.translate.bleu_score import corpus_bleu, sentence_bleu

import numpy as np
import glob
import h5py
import string
import pickle

from os import listdir
from os.path import join, isdir, isfile, exists

In [13]:
n_lstm_units = 256
VGG19 = 4096
M = 36
n_vocabs = 7277
n_embeddeing = 512

# input1, input2 are encoder
# Image feature
input1 = Input(shape=(VGG19))
dropout1 = Dropout(0.5)(input1)
fc1 = Dense(n_lstm_units, activation='relu')(dropout1)
    
# Caption
input2 = Input(shape=(M))
# In this paper, specified embedding vector size as 512.
embedded_layer1 = Embedding(n_vocabs,n_embeddeing, mask_zero=True)(input2)
dropout2 = Dropout(0.5)(embedded_layer1)
lstm1 = LSTM(n_lstm_units)(dropout2)
    
# Decoder
fc2 = add([fc1, lstm1])
fc3 = Dense(n_lstm_units, activation='relu')(fc2)
outputs = Dense(n_vocabs, activation='softmax')(fc3)
    
# Inputs are X, Y, and ouput is Z
model = Model(inputs=[input1, input2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           [(None, 36)]         0                                            
__________________________________________________________________________________________________
input_9 (InputLayer)            [(None, 4096)]       0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 36, 512)      3725824     input_10[0][0]                   
__________________________________________________________________________________________________
dropout_6 (Dropout)             (None, 4096)         0           input_9[0][0]                    
____________________________________________________________________________________________

In [16]:
a = load_model('/content/drive/MyDrive/Project/im2txt/transfer.model.ep001.acc0.3872.h5')
a.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 36)]         0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 4096)]       0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 36, 512)      3725824     input_2[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 4096)         0           input_1[0][0]                    
____________________________________________________________________________________________