# Compile

Compile a Keras model for character-level language modelling and save it to disk.

### Load Data

In [1]:
import pickle

abstracts_char_lm = pickle.load(open('data.p', 'rb'))

embeddings = abstracts_char_lm['embeddings']
abstracts_padded = abstracts_char_lm['abstracts_padded']
char2idx, idx2char = abstracts_char_lm['char2idx'], abstracts_char_lm['idx2char']

num_abstracts, maxlen = len(abstracts_padded), len(abstracts_padded[0])
vocab_dim, num_chars = embeddings.shape

### Build Keras Model

#### Train model

The only difference between the two are the sequence lengths by which they operate. During training we want to rollout the network for quite a bit so we can backprop error and handle long-term dependencies, whereas during sampling, we need to be able to feed the network's output back into itself one character at a time.

In [2]:
from keras.models import Sequential
from keras.layers.core import TimeDistributedDense, Activation, Dropout
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM

def create_model(seq_length, hidden_dim=32):
    model = Sequential()

    model.add(Embedding(
            input_dim=vocab_dim, output_dim=num_chars,
            weights=[embeddings],
            batch_input_shape=(num_abstracts, seq_length), input_length=seq_length,
            trainable=False, # freeze the one-hot embeddings
            mask_zero=True # don't count front-padded zeros as characters
    ))

    model.add(LSTM(output_dim=hidden_dim, return_sequences=True, stateful=True))
    model.add(Dropout(0.2))
    model.add(LSTM(output_dim=hidden_dim, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(TimeDistributedDense(output_dim=vocab_dim, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    
    return model
    
SEQ_LENGTH = 20

train_model = create_model(seq_length=SEQ_LENGTH, hidden_dim=32)
# sample_model = create_model(seq_length=1, hidden_dim=512)

Using Theano backend.


### Save to Disk

#### Train Model

In [3]:
def save_model(model, model_type):
    jsonified_model = model.to_json()

    open('{}_model.json'.format(model_type), 'w').write(jsonified_model)
    
save_model(train_model, 'train')
# save_model(sample_model, 'sample')