In [0]:
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle
import pandas as pd

In [3]:
from keras.preprocessing import image, sequence
from keras.layers import Dense, Convolution2D, Dropout, LSTM, TimeDistributed, Embedding, Bidirectional, Activation, RepeatVector
from keras.layers import concatenate, Input
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.applications import ResNet50

Using TensorFlow backend.


In [0]:
from keras.models import Model
from keras.callbacks import ModelCheckpoint, TensorBoard

In [5]:
pd_dataset = pd.read_csv("flickr_8k_train_dataset.txt", delimiter='\t')
ds = pd_dataset.values
print (ds.shape)

(30000, 2)


In [0]:
from time import time

In [7]:
sentences = []
for ix in range(ds.shape[0]):
    sentences.append(ds[ix, 1])
    
print (len(sentences))

30000


In [0]:
words = [i.split() for i in sentences]

In [0]:
unique = []
for i in words:
    unique.extend(i)

In [10]:
unique = list(set(unique))
print (len(unique))

vocab_size = len(unique)

8253


In [0]:
#Vectorization
word_2_indices = {val:index for index, val in enumerate(unique)}
indices_2_word = {index:val for index, val in enumerate(unique)}

In [0]:
word_2_indices['UNK'] = 0
word_2_indices['raining'] = 8253

indices_2_word[0] = 'UNK'
indices_2_word[8253] = 'raining'

In [13]:
print (word_2_indices['<start>'])
print (indices_2_word[8020])
print (word_2_indices['<end>'])
print (indices_2_word[204])

3041
ledge
761
seagulls


In [14]:
vocab_size = len(word_2_indices.keys())
print (vocab_size)

8254


In [15]:
captions = np.load("captions3500.npy")
next_words = np.load("next_words3500.npy")

print (captions.shape)
print (next_words.shape)

(25493, 40)
(25493, 8254)


In [16]:
images = np.load("images3500.npy")

print (images.shape)

(25493, 2048)


In [17]:
imag = np.load("image_names3500.npy")
        
print (imag.shape)

(25493,)


In [0]:
embedding_size = 128
max_len = 40

In [19]:
input1 = Input(shape=(2048,))

model1_dense1 = Dense(embedding_size, input_shape=(2048,), activation='relu')(input1)
model1_rep1 = RepeatVector(max_len)(model1_dense1)

model1 = Model(inputs=input1, outputs=model1_rep1)
model1.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               262272    
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 40, 128)           0         
Total params: 262,272
Trainable params: 262,272
Non-trainable params: 0
_________________________________________________________________


In [20]:
input2 = Input(shape=(max_len,))
model2_emb1 = Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_len)(input2)
model2_lstm1 = LSTM(256, return_sequences=True)(model2_emb1)
model2_timeDist1 = TimeDistributed(Dense(embedding_size))(model2_lstm1)
                                   
model2 = Model(inputs=input2, outputs=model2_timeDist1)
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 40)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 40, 128)           1056512   
_________________________________________________________________
lstm_1 (LSTM)                (None, 40, 256)           394240    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 40, 128)           32896     
Total params: 1,483,648
Trainable params: 1,483,648
Non-trainable params: 0
_________________________________________________________________


In [21]:
concat = concatenate([model1_rep1, model2_timeDist1])
final_lstm1 = LSTM(128, return_sequences=True)(concat)
final_lstm2 = LSTM(512, return_sequences=False)(final_lstm1)
final_dense = Dense(vocab_size, activation='softmax')(final_lstm2)
model = Model(inputs=[input1, input2], outputs=final_dense)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 40)           0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            (None, 2048)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 40, 128)      1056512     input_2[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          262272      input_1[0][0]                    
__________________________________________________________________________________________________
lstm_1 (LS

In [0]:
model.compile(loss='categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

In [0]:
tensorboard = TensorBoard(log_dir="logs4/{}".format(time()))

In [24]:
model.load_weights('model_weightsw2.h5')
hist = model.fit([images, captions], next_words, batch_size=512, epochs=20, callbacks=([tensorboard]))
model.save_weights("model_weights1024w3.h5")

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
model_json = model.to_json()
with open("model1.json", "w") as json_file:
    json_file.write(model_json)