In [1]:
from preprocess.dataset import invoke_emotion_to_dataset, invoke_edited_to_dataset, load_caption

path = './dataset'
flickr_folder = path + '/flickr10k'

all_filenames = {
    'factual': [],
    'happy': [],
    'sad': [],
    'angry': []
}
all_captions = {
    'factual': [],
    'happy': [],
    'sad': [],
    'angry': []
}

modes = ['happy', 'sad', 'angry']
captions = []
filenames = []
for mode in modes + ['factual']:
    train, val, test = load_caption(flickr_folder + '/' + mode)
    filenames_train, captions_train = train
    filenames_val, captions_val = val
    filenames_test, captions_test = test
    if mode == 'factual':
        filenames += filenames_train + filenames_val + filenames_test
    captions += captions_train + captions_val + captions_test

# for test only
train, val, test = load_caption(flickr_folder + '/' + 'happy')
filenames_train, captions_train = train
filenames_val, captions_val = val
filenames_test, captions_test = test
filenames_train_happy, captions_train_happy = (filenames_train+filenames_val+filenames_test), (captions_train+captions_val+captions_test)
train, val, test = load_caption(flickr_folder + '/' + 'factual')
filenames_train, captions_train = train
filenames_val, captions_val = val
filenames_test, captions_test = test
filenames_train_factual, captions_train_factual = (filenames_train+filenames_val+filenames_test), (captions_train+captions_val+captions_test)

mp = {}
for filename, caption in zip(filenames_train_factual, captions_train_factual):
    mp[filename] = caption
    
encoder_input = []
decoder_input = []
for filename, caption in zip(filenames_train_happy, captions_train_happy):
    for cap in mp[filename]:
        encoder_input.append([cap])
        decoder_input.append([caption[0]])

In [2]:
from preprocess.tokenizer import mark_captions, flatten, TokenizerWrap, mark_start, mark_end

Using TensorFlow backend.


In [3]:
%%time
num_words = 10000
captions_marked = mark_captions(captions)
captions_flat = flatten(captions_marked)
tokenizer = TokenizerWrap(texts=captions_flat, num_words=num_words)
# num_words = len(tokenizer.word_index)
print(num_words)

10000
CPU times: user 1 s, sys: 14.4 ms, total: 1.02 s
Wall time: 1.02 s


In [4]:
%%time
encoder_input_marked = mark_captions(encoder_input)
tokens_encoder_input = tokenizer.captions_to_tokens(encoder_input_marked)

decoder_input_marked = mark_captions(decoder_input)
tokens_decoder_input = tokenizer.captions_to_tokens(decoder_input_marked)

CPU times: user 180 ms, sys: 4.55 ms, total: 184 ms
Wall time: 185 ms


In [5]:
token_start = tokenizer.word_index[mark_start.strip()]
token_end = tokenizer.word_index[mark_end.strip()]
print('token_start', token_start)
print('token_end', token_end)

token_start 1
token_end 2


In [6]:
import numpy as np
from keras.preprocessing.sequence import pad_sequences

def batch_generator(batch_size,
                    tokens_encoder_input,
                    tokens_decoder_input):
    while True:
        ids = np.random.randint(len(tokens_encoder_input), size=batch_size)
        partial_tokens_encoder_input = []
        partial_tokens_decoder_input = []
        for i in ids:
            partial_tokens_encoder_input.append(tokens_encoder_input[i][0])
            partial_tokens_decoder_input.append(tokens_decoder_input[i][0])
        max_tokens_encoder = np.max([len(t) for t in partial_tokens_encoder_input])
        max_tokens_decoder = np.max([len(t) for t in partial_tokens_decoder_input])
        
        tokens_encoder_padded = pad_sequences(
            partial_tokens_encoder_input,
            maxlen=max_tokens_encoder,
            padding='post',
            truncating='post')
        tokens_decoder_padded = pad_sequences(
            partial_tokens_decoder_input,
            maxlen=max_tokens_decoder,
            padding='post',
            truncating='post')
        
        encoder_input_data = tokens_encoder_padded
        decoder_input_data = tokens_decoder_padded[:, 0:-1]
        decoder_output_data = tokens_decoder_padded[:, 1:]
        
        x_data = {
            'encoder_input': encoder_input_data,
            'decoder_input': decoder_input_data
        }

        y_data = {'decoder_output': decoder_output_data}

        yield (x_data, y_data)

In [7]:
generator_train = batch_generator(batch_size=32,
                                  tokens_encoder_input=tokens_encoder_input,
                                  tokens_decoder_input=tokens_decoder_input)
step = len(tokens_encoder_input)//32
step

140

In [8]:
from model import Seq2Seq

seq2seq = Seq2Seq(mode='happy')

Instructions for updating:
Colocations handled automatically by placer.


In [9]:
from tensorflow.core.protobuf import rewriter_config_pb2
from keras import backend as K
import tensorflow as tf

config = tf.ConfigProto()
off = rewriter_config_pb2.RewriterConfig.OFF
config.graph_options.rewrite_options.memory_optimization = off

K.tensorflow_backend.set_session(tf.Session(config=config))

In [10]:
seq2seq.model.fit_generator(generator=generator_train,
                           steps_per_epoch=step)

Instructions for updating:
Use tf.cast instead.
Epoch 1/1
 11/140 [=>............................] - ETA: 3:59 - loss: 5.2507

KeyboardInterrupt: 

In [19]:
tokens_encoder_input[1]

[[1, 4, 6, 23, 3, 199, 160, 5, 2166, 8, 1256, 3, 29, 249, 3, 258, 2]]

In [20]:
seq2seq.predict(tokens_encoder_input[1], token_start, token_end)

[1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]