# Building language model candidates

Testing of different architectures will be done in a Python script. This notebook is mainly for building and initializing model candidates that can then be loaded from the script.

## Load data

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
!ls data/lang_model/

[1m[36memb_mat.bc[m[m      [1m[36mlabels.bc[m[m       word_index.json
[1m[36minputs.bc[m[m       [1m[36mseqs.bc[m[m         word_labels.tsv


In [3]:
!mkdir models/lang_model

mkdir: models/lang_model: File exists


In [4]:
from tep.utils import load_array

In [5]:
inputs = load_array('data/lang_model/inputs.bc')
print(inputs.shape)
labels = load_array('data/lang_model/labels.bc')
print(labels.shape)
emb_mat = load_array('data/lang_model/emb_mat.bc')
print(emb_mat.shape)

(895, 20)
(895, 101)
(101, 200)


## Baseline LSTM model

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [7]:
model = Sequential()
model.add(Embedding(emb_mat.shape[0], emb_mat.shape[1], input_length=inputs.shape[1], weights=[emb_mat], name='embedding'))
model.add(LSTM(64, name='lstm'))
model.add(Dense(emb_mat.shape[0], activation='softmax', name='output'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 200)           20200     
_________________________________________________________________
lstm (LSTM)                  (None, 64)                67840     
_________________________________________________________________
output (Dense)               (None, 101)               6565      
Total params: 94,605
Trainable params: 94,605
Non-trainable params: 0
_________________________________________________________________


In [9]:
from tep.modelUtils import save_architecture
filename = 'models/lang_model/baseline_lstm'
save_architecture(model, filename + '.json')
model.save_weights(filename + '.hdf5')

## Standard LSTM model

In [10]:
model = Sequential()
model.add(Embedding(emb_mat.shape[0], emb_mat.shape[1], input_length=inputs.shape[1], weights=[emb_mat], name='embedding'))
model.add(LSTM(128, name='lstm'))
model.add(Dense(emb_mat.shape[0], activation='softmax', name='output'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 200)           20200     
_________________________________________________________________
lstm (LSTM)                  (None, 128)               168448    
_________________________________________________________________
output (Dense)               (None, 101)               13029     
Total params: 201,677
Trainable params: 201,677
Non-trainable params: 0
_________________________________________________________________


In [12]:
filename = 'models/lang_model/standard_lstm'
save_architecture(model, filename + '.json')
model.save_weights(filename + '.hdf5')

## Standard LSTM with very low weight decay

In [13]:
from keras.regularizers import l2
model = Sequential()
model.add(Embedding(emb_mat.shape[0], emb_mat.shape[1], input_length=inputs.shape[1], weights=[emb_mat], name='embedding'))
model.add(LSTM(128, kernel_regularizer=l2(1e-10), recurrent_regularizer=l2(1e-10), bias_regularizer=l2(1e-10), name='lstm'))
model.add(Dense(emb_mat.shape[0], activation='softmax', kernel_regularizer=l2(1e-10), bias_regularizer=l2(1e-10), name='output'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 200)           20200     
_________________________________________________________________
lstm (LSTM)                  (None, 128)               168448    
_________________________________________________________________
output (Dense)               (None, 101)               13029     
Total params: 201,677
Trainable params: 201,677
Non-trainable params: 0
_________________________________________________________________


In [15]:
filename = 'models/lang_model/standard_lstm_wd'
save_architecture(model, filename + '.json')
model.save_weights(filename + '.hdf5')

## Standard LSTM with standard dropout

In [16]:
from keras.layers import Dropout

In [17]:
model = Sequential()
model.add(Embedding(emb_mat.shape[0], emb_mat.shape[1], input_length=inputs.shape[1], weights=[emb_mat], name='embedding'))
model.add(Dropout(0.25, name='embedding_dropout'))
model.add(LSTM(128, kernel_regularizer=l2(1e-4), recurrent_regularizer=l2(1e-4), bias_regularizer=l2(1e-4), name='lstm'))
model.add(Dropout(0.25, name='output_dropout'))
model.add(Dense(emb_mat.shape[0], activation='softmax', kernel_regularizer=l2(1e-4), bias_regularizer=l2(1e-4), name='output'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [18]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 200)           20200     
_________________________________________________________________
embedding_dropout (Dropout)  (None, 20, 200)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 128)               168448    
_________________________________________________________________
output_dropout (Dropout)     (None, 128)               0         
_________________________________________________________________
output (Dense)               (None, 101)               13029     
Total params: 201,677
Trainable params: 201,677
Non-trainable params: 0
_________________________________________________________________


In [19]:
filename = 'models/lang_model/standard_lstm_dropout'
save_architecture(model, filename + '.json')
model.save_weights(filename + '.hdf5')

## Variational LSTM with standard dropout

In [20]:
model = Sequential()
model.add(Embedding(emb_mat.shape[0], emb_mat.shape[1], input_length=inputs.shape[1], weights=[emb_mat], name='embedding'))
model.add(Dropout(0.25, name='embedding_dropout'))
model.add(LSTM(128, kernel_regularizer=l2(1e-4), recurrent_regularizer=l2(1e-4), bias_regularizer=l2(1e-4), dropout=0.25, recurrent_dropout=0.25, name='lstm'))
model.add(Dropout(0.25, name='output_dropout'))
model.add(Dense(emb_mat.shape[0], activation='softmax', kernel_regularizer=l2(1e-4), bias_regularizer=l2(1e-4), name='output'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [21]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 200)           20200     
_________________________________________________________________
embedding_dropout (Dropout)  (None, 20, 200)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 128)               168448    
_________________________________________________________________
output_dropout (Dropout)     (None, 128)               0         
_________________________________________________________________
output (Dense)               (None, 101)               13029     
Total params: 201,677
Trainable params: 201,677
Non-trainable params: 0
_________________________________________________________________


In [22]:
filename = 'models/lang_model/variational_lstm_dropout'
save_architecture(model, filename + '.json')
model.save_weights(filename + '.hdf5')

## Conclusion

In [23]:
!ls models/lang_model/

baseline_lstm.hdf5            standard_lstm_dropout.json
baseline_lstm.json            standard_lstm_wd.hdf5
standard_lstm.hdf5            standard_lstm_wd.json
standard_lstm.json            variational_lstm_dropout.hdf5
standard_lstm_dropout.hdf5    variational_lstm_dropout.json
