In [1]:
import os
from tqdm import tqdm
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint
import pickle
import string
from pickle import load
from keras.preprocessing.text import Tokenizer
from numpy import array
import prepare

Using TensorFlow backend.


In [2]:
sample=False
model_name='VGG16'
image_dir='data/images'
caption_file='data/caption/Flickr8k.token.txt'
clan_caption=False
load_pickle=True

In [3]:
prepare = prepare.Prepare(sample,model_name,image_dir,caption_file,clan_caption,load_pickle)

Loading model  VGG16
Instructions for updating:
keep_dims is deprecated, use keepdims instead
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
________________________________________________

In [4]:
for key, dict_list in prepare.description.items():
    print(key, dict_list)
    break

1000268201_693b08cb0e ['A child in a pink dress is climbing up a set of stairs in an entry way .', 'A girl going into a wooden building .', 'A little girl climbing into a wooden playhouse .', 'A little girl climbing the stairs to her playhouse .', 'A little girl in a pink dress going into a wooden cabin .']


In [6]:
training_dataset = prepare.load_identifiers('data/caption/Flickr_8k.trainImages.txt')
testing_dataset = prepare.load_identifiers('data/caption/Flickr_8k.testImages.txt')
validation_dataset = prepare.load_identifiers('data/caption/Flickr_8k.devImages.txt')


In [7]:
print(training_dataset[0:3])
print(len(training_dataset))

['2513260012_03d33305cf', '2903617548_d3e38d7f88', '3338291921_fe7ae0c8f8']
6000


In [8]:
print(testing_dataset[0:3])
print(len(testing_dataset))

['3385593926_d3e9c21170', '2677656448_6b7e7702af', '311146855_0b65fdb169']
1000


In [9]:
print(validation_dataset[0:3])
print(len(validation_dataset))

['2090545563_a4e66ec76b', '3393035454_2d2370ffd4', '3695064885_a6922f06b2']
1000


In [10]:
train_feature = prepare.load_features(training_dataset)
test_feature = prepare.load_features(testing_dataset)
validation_feature = prepare.load_features(validation_dataset)

In [11]:
len(train_feature)

6000

In [12]:
len(test_feature)

1000

In [13]:
len(validation_feature)

1000

In [14]:
train_description = prepare.load_description(training_dataset)
test_description = prepare.load_description(testing_dataset)
validation_description = prepare.load_description(validation_dataset)

In [15]:
len(train_description)

6000

In [16]:
len(test_description)

1000

In [17]:
len(validation_description)

1000

In [21]:
# Total Descrption vocab size
tokenizer, vocab_size, max_length = prepare.create_tokens(prepare.description)

Vocabulary Size: 8494
Max Description Length: 38


In [25]:
for word, index in tokenizer.word_index.items():
    print(word, index)
    break

a 1


In [20]:
_, _, m1 = prepare.create_tokens(train_description)
_, _, m2 = prepare.create_tokens(test_description)
_, _, m3 = prepare.create_tokens(validation_description)
max_length = max([max_length, m1, m2, m3])

Vocabulary Size: 7378
Max Description Length: 40
Vocabulary Size: 3149
Max Description Length: 36
Vocabulary Size: 3288
Max Description Length: 35


In [None]:
detail = dict()
detail['vocab_size'] = vocab_size
detail['max_length'] = max_length
pickle.dump(tokenizer, open('tokenizer.pkl', 'wb'))
pickle.dump(detail, open('detail.pkl', 'wb'))

In [18]:
X1train, X2train, ytrain = prepare.create_sequence(train_description, tokenizer, max_length, vocab_size, train_feature)

creating sequence 


In [19]:
X1test, X2test, ytest = prepare.create_sequence(test_description, tokenizer, max_length,vocab_size, test_feature)

creating sequence 


In [20]:
X1validation, X2validation, yvalidation = prepare.create_sequence(validation_description, tokenizer, max_length,vocab_size, validation_feature)

creating sequence 


In [21]:
# Training Data summery
print(len(X1train), len(X2train), len(ytrain))
print(X1train[0])
print(X2train[0:3])
print(ytrain[0])

354479 354479 354479
[ 0.55907613  0.          0.09575209 ...,  0.          0.          0.        ]
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  1]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  2  1 15]]
[ 0.  1.  0. ...,  0.  0.  0.]


In [22]:
# Testing Data summery
print(len(X1test), len(X2test), len(ytest))
print(X1test[0])
print(X2test[0:3])
print(ytest[0])

59336 59336 59336
[ 0.          0.          0.32350716 ...,  0.          0.          0.        ]
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  4]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  2  4 32]]
[ 0.  0.  0. ...,  0.  0.  0.]


In [23]:
# Validation Data summery
print(len(X1validation), len(X2validation), len(yvalidation))
print(X1validation[0])
print(X2validation[0:3])
print(yvalidation[0])

59643 59643 59643
[ 0.09347612  0.          3.68288469 ...,  0.          0.13240606
  0.38655221]
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  5]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  2  5 19]]
[ 0.  0.  0. ...,  0.  0.  0.]


In [24]:
prepare.checkpoint_prepare()

In [25]:
prepare.define_caption_model(max_length, vocab_size)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 40)           0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 4096)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 40, 256)      2174464     input_3[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 4096)         0        

  self.model_new = Model(inputs=[input_image_feature,input_sequence], output=output)


In [26]:
prepare.model_fit(X1train, X2train, ytrain,X1validation, X2validation, yvalidation)

Train on 354479 samples, validate on 59643 samples
Epoch 1/20


KeyboardInterrupt: 