In [14]:
import os
import joblib
from keras.preprocessing.text import Tokenizer, one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model, Sequential, model_from_json
from keras.utils import to_categorical
from keras.layers.core import Dense, Dropout, Flatten
from keras.optimizers import RMSprop
from keras.layers.convolutional import Conv2D
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.layers import Embedding, TimeDistributed, RepeatVector, LSTM, GRU, concatenate , Input, Reshape, Dense
import numpy as np

In [4]:
max_length = 20
tokenizer_split = '\n'
img_dim = 256

epochs = 300
batch_size = 128

img_dir = '../ressources/_img_test/'
page_dir = '../ressources/_xml_test/'

images = joblib.load(img_dir + 'img.dmp')
pages = joblib.load(page_dir + 'page.dmp')

In [29]:
# Initialize the function to create the vocabulary 
tokenizer = Tokenizer(filters='', split=tokenizer_split, lower=False)
tokenizer.fit_on_texts(pages)

joblib.dump(tokenizer, 'tokenizer.dmp')

# Add one spot for the empty word in the vocabulary 
vocab_size = len(tokenizer.word_index) + 1
# Map the input sentences into the vocabulary indexes
train_sequences = tokenizer.texts_to_sequences(pages)
# The longest set of boostrap tokens
max_sequence = max(len(s) for s in train_sequences)

# limit max_length -> max_sequence
if max_length > max_sequence:
    max_length = max_sequence

def preprocess_data(sequences, features):
    X, y, image_data = list(), list(), list()
    for img_no, seq in enumerate(sequences):
        for i in range(1, len(seq)):
            # Add the sentence until the current count(i) and add the current count to the output
            in_seq, out_seq = seq[:i], seq[i]
            # Pad all the input token sentences to max_sequence
            in_seq = pad_sequences([in_seq], maxlen=max_sequence)[0]
            # Turn the output into one-hot encoding
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
            # Add the corresponding image to the boostrap token file
            image_data.append(features[img_no])
            # Cap the input sentence to max_length tokens and add it
            X.append(in_seq[-max_length:])
            y.append(out_seq)
    return np.array(X), np.array(y), np.array(image_data)

X, y, image_data = preprocess_data(train_sequences, images)

In [12]:
#Create the encoder
image_model = Sequential()
image_model.add(Conv2D(16, (3, 3), padding='valid', activation='relu', input_shape=images[0].shape))
image_model.add(Conv2D(16, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(128, (3,3), activation='relu', padding='same'))

image_model.add(Flatten())
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))

image_model.add(RepeatVector(max_length))

visual_input = Input(shape=images[0].shape)
encoded_image = image_model(visual_input)

language_input = Input(shape=(max_length,))
language_model = Embedding(vocab_size, 50, input_length=max_length, mask_zero=True)(language_input)
language_model = GRU(128, return_sequences=True)(language_model)
language_model = GRU(128, return_sequences=True)(language_model)

#Create the decoder
decoder = concatenate([encoded_image, language_model])
decoder = GRU(512, return_sequences=True)(decoder)
decoder = GRU(512, return_sequences=False)(decoder)
decoder = Dense(vocab_size, activation='softmax')(decoder)

# Compile the model
model = Model(inputs=[visual_input, language_input], outputs=decoder)
optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

# save model
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

In [15]:
csvlogger = CSVLogger('log.csv', append=False, separator=';')
filepath ="epoch-{epoch:04d}--val_loss-{val_loss:.4f}--loss-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_weights_only=True, period=2)
callbacks = [checkpoint,csvlogger]

# Train the model
model.fit([image_data, X], y, 
          batch_size=batch_size, 
          shuffle=False, 
          validation_split=0.1, 
          callbacks=callbacks, 
          verbose=1, 
          epochs=epochs)

Train on 234 samples, validate on 26 samples
Epoch 1/300
Epoch 2/300

Epoch 00002: saving model to epoch-0002--val_loss-2.6732--loss-2.7525.hdf5
Epoch 3/300
Epoch 4/300

Epoch 00004: saving model to epoch-0004--val_loss-2.5883--loss-2.5495.hdf5
Epoch 5/300
Epoch 6/300

Epoch 00006: saving model to epoch-0006--val_loss-2.5083--loss-2.4852.hdf5
Epoch 7/300
Epoch 8/300

Epoch 00008: saving model to epoch-0008--val_loss-2.4608--loss-2.4218.hdf5
Epoch 9/300
Epoch 10/300

Epoch 00010: saving model to epoch-0010--val_loss-2.3723--loss-2.3365.hdf5
Epoch 11/300
Epoch 12/300

Epoch 00012: saving model to epoch-0012--val_loss-2.3375--loss-2.2950.hdf5
Epoch 13/300
Epoch 14/300

Epoch 00014: saving model to epoch-0014--val_loss-2.2672--loss-2.2283.hdf5
Epoch 15/300
Epoch 16/300

Epoch 00016: saving model to epoch-0016--val_loss-2.2602--loss-2.2117.hdf5
Epoch 17/300
Epoch 18/300

Epoch 00018: saving model to epoch-0018--val_loss-2.2173--loss-2.1552.hdf5
Epoch 19/300
Epoch 20/300

Epoch 00020: saving

Epoch 60/300

Epoch 00060: saving model to epoch-0060--val_loss-2.0529--loss-1.7815.hdf5
Epoch 61/300
Epoch 62/300

Epoch 00062: saving model to epoch-0062--val_loss-1.8116--loss-1.6964.hdf5
Epoch 63/300
Epoch 64/300

Epoch 00064: saving model to epoch-0064--val_loss-1.9265--loss-1.7098.hdf5
Epoch 65/300
Epoch 66/300

Epoch 00066: saving model to epoch-0066--val_loss-1.8167--loss-1.7618.hdf5
Epoch 67/300
Epoch 68/300

Epoch 00068: saving model to epoch-0068--val_loss-1.8302--loss-1.6688.hdf5
Epoch 69/300
Epoch 70/300

Epoch 00070: saving model to epoch-0070--val_loss-1.8837--loss-1.7524.hdf5
Epoch 71/300
Epoch 72/300

Epoch 00072: saving model to epoch-0072--val_loss-1.7622--loss-1.6615.hdf5
Epoch 73/300
Epoch 74/300

Epoch 00074: saving model to epoch-0074--val_loss-1.8742--loss-1.6498.hdf5
Epoch 75/300
Epoch 76/300

Epoch 00076: saving model to epoch-0076--val_loss-1.7444--loss-1.7790.hdf5
Epoch 77/300
Epoch 78/300

Epoch 00078: saving model to epoch-0078--val_loss-1.6942--loss-1.617

Epoch 118/300

Epoch 00118: saving model to epoch-0118--val_loss-1.6142--loss-1.4022.hdf5
Epoch 119/300
Epoch 120/300


KeyboardInterrupt: 

In [25]:
# map an integer to a word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
    photo = np.array([photo])
    # seed the generation process
    in_text = '<!--START-->\n'
    # iterate over the whole length of the sequence
    print('\nPrediction---->\n\n<START> ', end='')
    for i in range(150):
        # integer encode input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        # pad input
        sequence = pad_sequences([sequence], maxlen=max_length)
        # predict next word
        yhat = model.predict([photo, sequence], verbose=0)
        # convert probability to integer
        yhat = np.argmax(yhat)
        # map integer to word
        word = word_for_id(yhat, tokenizer)
        # stop if we cannot map the word
        if word is None:
            break
        # append as input for generating the next word
        in_text += word + ' '
        # stop if we predict the end of the sequence
        print(word + '\n', end='')
        if word == '<!--END-->':
            break
    return in_text

In [26]:
test_image = images[0]
generate_desc(model, tokenizer, np.array(test_image), max_length)


Prediction---->

<START> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="t

<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:m

'<!--START-->\n<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc"> <mvc:View displayBlock="true" xmlns=

In [28]:
print(pages[0])

<!--START-->
<mvc:View displayBlock="true" xmlns="sap.m" xmlns:mvc="sap.ui.core.mvc">
<VBox class="sapUiSmallMargin">
<Button text="Default" />
<Switch state="true" />
<CheckBox text="Option b" />
<Input width="250px" class="sapUiSmallMarginBottom" />
<CheckBox text="Option b" />
<Button text="Accept" type="Accept" />
<CheckBox text="Option b" />
<Button text="Accept" type="Accept" />
</VBox>
</mvc:View>
<!--END-->

