In [1]:
# load pickled data
import pickle

In [5]:
pwd

'/home/ubuntu'

In [6]:
with open('/home/ubuntu/test_encoded.pickle', 'rb') as f:
    test_encoded = pickle.load(f)

In [7]:
with open('/home/ubuntu/train_encoded.pickle', 'rb') as f:
    train_encoded = pickle.load(f)

In [8]:
with open('/home/ubuntu/embedding_matrix.pickle', 'rb') as f:
    embedding_matrix = pickle.load(f)

In [9]:
with open('/home/ubuntu/train_data.pickle', 'rb') as f:
    train_data = pickle.load(f)

In [10]:
with open('/home/ubuntu/word_to_idx.pickle', 'rb') as f:
    word_to_idx = pickle.load(f)

## Define Model

In [33]:
import numpy as np

In [24]:
# transfer learning model
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model
from keras.preprocessing import sequence, image 

In [13]:
# deep learning model
from keras import Input, layers, optimizers
from keras.layers import LSTM, Embedding, Dense, Activation, Flatten, Reshape, Dropout
from keras.layers.merge import add
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [38]:
import matplotlib.pyplot as plt

In [14]:
# calculate input shape
# calc using any img
input_shape = len(train_encoded['1000268201.jpg'])
print(input_shape)

2048


In [15]:
# create model inputs
inputs_1 = Input(shape=(input_shape,))
fe1 = Dropout(0.5)(inputs_1)
fe2 = Dense(256, activation='relu')(fe1)

In [30]:
vocab_size = 6791
embedding_dim = 200
max_length = 80

In [31]:
# create additional inputs
inputs_2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, embedding_dim, mask_zero=True)(inputs_2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)

In [22]:
# create decoder
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)

In [25]:
# create model and summary
model = Model(inputs=[inputs_1, inputs_2], outputs=outputs)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 80)]         0           []                               
                                                                                                  
 input_1 (InputLayer)           [(None, 2048)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, 80, 200)      1358200     ['input_4[0][0]']                
                                                                                                  
 dropout (Dropout)              (None, 2048)         0           ['input_1[0][0]']                
                                                                                              

## Model Training

In [26]:
# do not retrain weights in embedding layer
model.layers[2].set_weights([embedding_matrix])
model.layers[2].trainable = False

In [27]:
# compile model using categorical_crossentropy as loss function
# adam as optimizer
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [28]:
# train in batches
def data_generator(descriptions, photos, wordtoix, max_length, num_photos_per_batch):
    X1, X2, y = list(), list(), list()
    n=0
    # loop for ever over images
    while 1:
        for key, desc_list in descriptions.items():
            n+=1
            # retrieve the photo feature
            photo = photos[key]
            for desc in desc_list:
                # encode the sequence
                seq = [wordtoix[word] for word in desc.split(' ') if word in wordtoix]
                # split one sequence into multiple X, y pairs
                for i in range(1, len(seq)):
                    # split into input and output pair
                    in_seq, out_seq = seq[:i], seq[i]
                    # pad input sequence
                    in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                    # encode output sequence
                    out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
                    # store
                    X1.append(photo)
                    X2.append(in_seq)
                    y.append(out_seq)

            if n==num_photos_per_batch:
                yield ([np.array(X1), np.array(X2)], np.array(y))
                X1, X2, y = list(), list(), list()
                n=0

In [34]:
# train model for 10 epochs with batch size of 3 and 3500 steps per epoch
# May take hours to train
epochs = 10
batch_size = 3
steps = 3500

generator = data_generator(train_data, train_encoded, word_to_idx, max_length, batch_size)
model.fit(generator, epochs=epochs, steps_per_epoch=steps, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f47a4571a60>

# Save model

In [41]:
from keras.models import load_model

In [42]:
model.save('cloud_trained_model.h5') # creates a HDF5 file

  layer_config = serialize_layer_fn(layer)


In [43]:
test_model = load_model('cloud_trained_model.h5')