In [1]:
from numpy import array
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Model
from keras.layers import *
from keras.callbacks import ModelCheckpoint
import pydot
import numpy as np
from keras.models import load_model
from numpy import argmax
from nltk.translate.bleu_score import corpus_bleu
from keras.utils import CustomObjectScope

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Import utils and evaluate code

In [None]:
from utils import *
from evaluate import *

# Import the ResNet-152, C3D and semantic features for training and testing the model

In [4]:
#load the train ids for a particular class
class_file = 'data/msvd_classes/cook_train_ID.txt'
train = load_set(class_file)
print('Number of training videos: %d' % len(train))


# descriptions
train_descriptions = load_descriptions('data/descriptions_processed.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))

#load c2d features
train_c2d_features = load_video_features('data/msvd_resnet152_features.pkl', train)
print('C2D: train=%d' % len(train_c2d_features))

#load c3d features
train_c3d_features = load_video_features('data/msvd_c3d_features.pkl', train)
print('C3D: train=%d' % len(train_c3d_features))

#load semantic features
train_semantic_features = load_video_features('data/msvd_semantic_features.pkl', train)
print('Semantic: train=%d' % len(train_semantic_features))

# load validtaion set
filename = 'data/msvd_classes/cook_val_ID.txt'
test = load_set(filename)
print('Dataset: %d' % len(test))

# descriptions
test_descriptions = load_descriptions('data/descriptions_processed.txt', test)
print('Descriptions: test=%d' % len(test_descriptions))

#load c2d features
test_c2d_features = load_video_features('data/msvd_resnet152_features.pkl', test)
print('C2D: test=%d' % len(test_c2d_features))

#load c3d features
test_c3d_features = load_video_features('data/msvd_c3d_features.pkl', test)
print('C3D: test=%d' % len(test_c3d_features))

#load semantic features
test_semantic_features = load_video_features('data/msvd_semantic_features.pkl', test)
print('Semantic: test=%d' % len(test_semantic_features))


Dataset: 337
Descriptions: train=337
C2D: train=337
C3D: train=337
Semantic: train=337
Descriptions: train=100
C2D: train=100
C3D: train=100
Semantic: train=100


In [7]:
# create the tokenizer for MSVD descriptions
desc = to_lines(train_descriptions)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(desc)
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

max_length = 40


#Load the good model already built for captioning
model_old = load_model('pretrained_model_10.h5')
model_old.summary()

Vocabulary Size: 3084
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 2048)         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 4096)         0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          262272      input_1[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 128)          524416      input_2[0][0]                    
_______________________________________________________________________________________

# Create the model with new domain specific decoder

In [16]:
#load ouput from concatenate layer
layer_name = 'concatenate_3'
model= Model(inputs=model_old.input, outputs=model_old.get_layer(layer_name).output)

# make the previous layers not trainable
for layer in model.layers[:13]:
    layer.trainable = False
#take the output from the previous layers
x = model.output

# Add BLSTM layer
blstm = Bidirectional(LSTM(64))(x)

#Add an attention layer
att=Attention(max_length)(blstm)

#add dense layer
outputs = Dense(vocab_size, activation='softmax',name='final_dense')(blstm)

model = Model(inputs=model_old.input, outputs=outputs)

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()
#save model to file
plot_model(model, to_file='/home/mh/mywork/video_caption_domain/model_cook/model_cook.png', show_shapes=True)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 2048)         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 4096)         0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          262272      input_1[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 128)          524416      input_2[0][0]                    
__________________________________________________________________________________________________
input_4 (I

In [None]:
# train the domain specific model save after training
steps = len(train_descriptions)

# create the data generator
generator = data_generator(train_descriptions, train_c2d_features, train_c3d_features, train_semantic_features, tokenizer, max_length)

# fit the model to train the decoder
model.fit_generator(generator, epochs=50, steps_per_epoch=steps, verbose=1)
# save model
model.save('/home/mh/mywork/video_caption_domain/model_cook/model_' + str(i) + '.h5')