# Compile

Compile a Keras model for character-level language modelling and save it to disk.

### Load Data

In [4]:
import pickle

abstracts_char_lm = pickle.load(open('data.p', 'rb'))

embeddings = abstracts_char_lm['embeddings']
abstracts_padded = abstracts_char_lm['abstracts_padded']
char2idx, idx2char = abstracts_char_lm['char2idx'], abstracts_char_lm['idx2char']

num_abstracts, maxlen = len(abstracts_padded), len(abstracts_padded[0])
vocab_dim, num_chars = embeddings.shape

### Build Keras Model

In [5]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM

HIDDEN_DIM = 32

model = Sequential()

model.add(Embedding(
        input_dim=vocab_dim, output_dim=num_chars,
        weights=[embeddings],
        batch_input_shape=(num_abstracts, 1), input_length=1,
        trainable=False, # freeze the one-hot embeddings
        mask_zero=True # don't count front-padded zeros as characters
))

model.add(LSTM(output_dim=HIDDEN_DIM, stateful=True))
model.add(Dense(output_dim=vocab_dim, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

### Save to Disk

In [7]:
jsonified_model = model.to_json()

open('model.json', 'w').write(jsonified_model)