In [1]:
import keras
import tensorflow as tf
from keras.models import Sequential, Model
from keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, \
                         RepeatVector, LSTM, concatenate, \
                         Conv2D, MaxPooling2D, Flatten
from keras.optimizers import RMSprop
from keras.preprocessing.image import array_to_img, \
                                      img_to_array, load_img
from brainfart5 import chars_to_tokens, tokens_to_code
import numpy as np
import os

Using TensorFlow backend.


In [2]:
filter_sizes = [32, 64, 128]
kernel_sizes = [(3, 3), (3, 3), (3, 3)]
pool_sizes = [(2, 2), (2, 2), (2, 2)]
cnn_activation = 'relu'
cnn_dropout = 0.25

mlp_units = [1024, 1024]
mlp_activation = 'relu'
mlp_dropout = 0.3

text_rnn_sizes = [128, 128]
text_rnn_dropout = 0.0

decoder_sizes = [512, 512]
decoder_dropout = 0.0

In [3]:
sequence_length = 15
learning_rate = 0.0001

max_dataset_size = 600 #-1

In [4]:
START_TOKEN = '<start_token>'
END_TOKEN = '<end_token>'
PAD_TOKEN = '<pad_token>'


def unison_shuffle(a, b, c):
    p = np.random.permutation(len(a))
    return a[p], b[p], c[p]


def load_image(path):
    image = load_img(path)
    return img_to_array(image)


def make_if_not_exist(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [5]:
# Where all the data is.
dataset_directory = 'size_60000_ln_len_7'

# Where the image output is.
image_directory = os.path.join(dataset_directory, 'images')

# Where the corresponding chars are.
tokens_directory = os.path.join(dataset_directory, 'tokens')

# Dataset size. 
dataset_size = max_dataset_size if max_dataset_size > -1 \
               else len(os.listdir(image_directory))

# Get the shape of the images we are working with.
first_image_path = os.path.join(image_directory, 'output_0.png')
first_image_shape = load_image(first_image_path).shape

# Create a container to hold all of the images.
all_images = np.zeros((dataset_size, *first_image_shape))

all_chars_list = [[START_TOKEN, END_TOKEN, PAD_TOKEN]]

# Load all the images into the container.
for i in range(dataset_size):
    
    image_name = 'output_{}.png'.format(i)
    image_path = os.path.join(image_directory, image_name)
    all_images[i] = load_image(image_path) / 255.0
    
    tokens_name = 'tokens_{}.bf5'.format(i)
    
    with open(os.path.join(tokens_directory, tokens_name)) as file:
        chars = list(file.read())
        all_chars_list.append(chars)

In [6]:
all_chars_dict = dict()

for char_list in all_chars_list:
    
    for char in char_list:
        
        try:
            all_chars_dict[char] += 1
        except:
            all_chars_dict[char] = 0

# These are the chars that are used.
len(all_chars_dict)

tokens_to_chars = list()
chars_to_tokens = dict()
counter = 0

for key, value in all_chars_dict.items():
    tokens_to_chars.append(key)
    chars_to_tokens[key] = counter
    counter += 1
    
print(tokens_to_chars, '\n\n\n', chars_to_tokens)

output_size = len(tokens_to_chars)

['p', 'd', '}', '"', 'l', '%', ')', '1', 'q', 'V', '$', 'n', 'h', 'O', ',', '8', '&', '.', '@', 'w', '<', 'k', 'B', 'v', 'j', '9', '!', '6', 'u', '7', 'e', 'F', '_', '|', '*', '<end_token>', 'm', 'W', 'T', '\\', 's', 'f', '^', '+', ';', 'y', '<pad_token>', 'G', 'x', 'E', 'i', 'o', 'c', 'J', 'I', 'H', 'U', '-', 'X', 'g', 'K', '{', '>', 'S', 'z', '[', 'Y', "'", '#', '(', 'a', '?', '0', '5', 'P', '/', 'Q', 't', 'N', '=', 'A', '`', 'D', ']', 'R', 'r', 'L', '<start_token>', 'C', '4', 'b', 'Z', '~', ':', '3', '2', 'M'] 


 {'p': 0, 'd': 1, '}': 2, '"': 3, 'l': 4, '%': 5, ')': 6, '1': 7, 'r': 85, 'H': 55, '$': 10, 'n': 11, 'h': 12, 'O': 13, ',': 14, '&': 16, '.': 17, '@': 18, 'w': 19, 'k': 21, 'B': 22, 'v': 23, 'j': 24, '9': 25, '!': 26, '6': 27, 'u': 28, '7': 29, 'e': 30, '>': 62, '_': 32, '|': 33, '<end_token>': 35, 'm': 36, 'W': 37, 'T': 38, '\\': 39, 's': 40, '=': 79, '^': 42, '+': 43, ';': 44, 'y': 45, '<pad_token>': 46, 'G': 47, 'x': 48, 'L': 86, 'E': 49, 'i': 50, 'o': 51, 'c': 52, 'I':

In [7]:
pad_tokens = [PAD_TOKEN for _ in range(sequence_length)]

data_x = []
data_y = []

for i in range(dataset_size):
    
    tokens_name = 'tokens_{}.bf5'.format(i)
    
    with open(os.path.join(tokens_directory, tokens_name)) as file:
        chars = list(file.read())
        
        char_sequence = pad_tokens + [START_TOKEN] + chars + [END_TOKEN]
        tokens = [chars_to_tokens[c] for c in char_sequence]
        
    sequence_one_hot = to_categorical(tokens, num_classes=output_size)
    images = [all_images[i] for _ in range(sequence_length)]
    
    for start in range(len(sequence_one_hot) - sequence_length):
        
        end = start + sequence_length
        data_x.append([images, sequence_one_hot[start:end]])
        data_y.append(sequence_one_hot[end])

In [8]:
cnn_hyper_params = list(zip(range(len(filter_sizes)), filter_sizes, kernel_sizes, pool_sizes))

In [9]:
tf.reset_default_graph()

image_model = Sequential()
image_shape = (100, 100, 3)

for layer, filters, kernel_size, pool_size in cnn_hyper_params:
    if layer == 0:
        image_model.add(Conv2D(filters, 
                               kernel_size, 
                               padding='valid', 
                               activation=cnn_activation, 
                               input_shape=image_shape))
    else:
        image_model.add(Conv2D(filters, 
                               kernel_size, 
                               padding='valid',
                               activation=cnn_activation))
    image_model.add(Conv2D(filters, kernel_size, 
                           padding='valid',
                           activation=cnn_activation))
    image_model.add(MaxPooling2D(pool_size))
    image_model.add(Dropout(cnn_dropout))

image_model.add(Flatten())
    
for units in mlp_units:
    image_model.add(Dense(units, activation=mlp_activation))
    image_model.add(Dropout(mlp_dropout))

image_model.add(RepeatVector(sequence_length))
    
image_input = Input(shape=image_shape)
encoded_image = image_model(image_input)   


text_shape = (sequence_length, output_size)
text_model = Sequential()

for layer, size in enumerate(text_rnn_sizes):
    if layer == 0:
        text_model.add(LSTM(size, return_sequences=True, 
                            recurrent_dropout=text_rnn_dropout,
                            input_shape=text_shape))
    else:
        text_model.add(LSTM(size, return_sequences=True,
                            recurrent_dropout=text_rnn_dropout))

text_input = Input(shape=text_shape)
encoded_text = text_model(text_input)

decoder = concatenate([encoded_image, encoded_text])

for layer, size in enumerate(decoder_sizes):
    decoder = LSTM(size, 
                   recurrent_dropout=decoder_dropout,
                   return_sequences=(layer != (len(decoder_sizes) - 1)))(decoder)
decoder = Dense(output_size, activation='softmax')(decoder)
                       
model = Model(inputs=[image_input, text_input], outputs=decoder)
optimiser = RMSprop(lr=learning_rate, clipvalue=1.0)
model.compile(loss='categorical_crossentropy', optimizer=optimiser)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [11]:
batch_size = 1
epochs = 20

model.fit(x=data_x, 
          y=data_y, 
          batch_size=batch_size, 
          epochs=epochs, 
          shuffle=True, 
          validation_split=0.1,
          verbose=1)

KeyboardInterrupt: 