# Baseline Models

In [9]:
import numpy as np

import gensim

from tensorflow.keras import utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import GRU, LSTM, Activation, Dense, Dropout, Embedding



In [10]:
w2v_model = gensim.models.KeyedVectors.load("w2v.model", mmap='r')

In [None]:
vocab_size, emdedding_size = w2v_model.wv.vectors.shape

In [37]:
x = np.load('data/x.npy')
y = np.load('data/y.npy')

## Baseline #1: GRU

In [41]:
gru = Sequential()

gru.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
gru.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(256, return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(128))
gru.add(Dropout(0.2))
gru.add(Dense(vocab_size, activation='softmax'))

In [42]:
gru.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [43]:
filepath = "weights/gru.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [44]:
gru.fit(x, y, batch_size=128, epochs=20, callbacks=callbacks)

Train on 1485 samples
Epoch 1/20
Epoch 00001: loss improved from inf to 9.08099, saving model to weights/gru.hdf5
Epoch 2/20
Epoch 00002: loss improved from 9.08099 to 7.67719, saving model to weights/gru.hdf5
Epoch 3/20
Epoch 00003: loss improved from 7.67719 to 6.44648, saving model to weights/gru.hdf5
Epoch 4/20
Epoch 00004: loss improved from 6.44648 to 6.12681, saving model to weights/gru.hdf5
Epoch 5/20
Epoch 00005: loss improved from 6.12681 to 6.04947, saving model to weights/gru.hdf5
Epoch 6/20
Epoch 00006: loss improved from 6.04947 to 6.01415, saving model to weights/gru.hdf5
Epoch 7/20
Epoch 00007: loss improved from 6.01415 to 5.99797, saving model to weights/gru.hdf5
Epoch 8/20
Epoch 00008: loss improved from 5.99797 to 5.98792, saving model to weights/gru.hdf5
Epoch 9/20
Epoch 00009: loss improved from 5.98792 to 5.98631, saving model to weights/gru.hdf5
Epoch 10/20
Epoch 00010: loss improved from 5.98631 to 5.98503, saving model to weights/gru.hdf5
Epoch 11/20
Epoch 000

<tensorflow.python.keras.callbacks.History at 0x217d95e0d68>

## Baseline #2: GRU + Word2Vec

In [17]:
gru_w2v = Sequential()

gru_w2v.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size, weights=[w2v_model.wv.vectors]))
gru_w2v.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(256, return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(128))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(Dense(vocab_size, activation='softmax'))

In [18]:
gru_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [45]:
filepath = "weights/gru_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [47]:
gru_w2v.fit(x, y, batch_size=128, epochs=20, callbacks=callbacks)

Train on 1485 samples
Epoch 1/20
Epoch 00001: loss improved from 7.27410 to 6.30135, saving model to weights/gru_w2v.hdf5
Epoch 2/20
Epoch 00002: loss improved from 6.30135 to 6.09080, saving model to weights/gru_w2v.hdf5
Epoch 3/20
Epoch 00003: loss improved from 6.09080 to 6.03003, saving model to weights/gru_w2v.hdf5
Epoch 4/20
Epoch 00004: loss improved from 6.03003 to 6.00779, saving model to weights/gru_w2v.hdf5
Epoch 5/20
Epoch 00005: loss improved from 6.00779 to 5.99053, saving model to weights/gru_w2v.hdf5
Epoch 6/20
Epoch 00006: loss did not improve from 5.99053
Epoch 7/20
Epoch 00007: loss improved from 5.99053 to 5.98847, saving model to weights/gru_w2v.hdf5
Epoch 8/20
Epoch 00008: loss improved from 5.98847 to 5.98231, saving model to weights/gru_w2v.hdf5
Epoch 9/20
Epoch 00009: loss improved from 5.98231 to 5.97804, saving model to weights/gru_w2v.hdf5
Epoch 10/20
Epoch 00010: loss improved from 5.97804 to 5.97348, saving model to weights/gru_w2v.hdf5
Epoch 11/20
Epoch 0

<tensorflow.python.keras.callbacks.History at 0x21780016da0>

## Baseline #3: LSTM 

In [22]:
lstm = Sequential()

lstm.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
lstm.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(256, return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(128))
lstm.add(Dropout(0.2))
lstm.add(Dense(vocab_size, activation='softmax'))

In [23]:
lstm.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [48]:
filepath = "weights/lstm.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [49]:
lstm.fit(x, y, batch_size=128, epochs=20, callbacks=callbacks)

Train on 1485 samples
Epoch 1/20
Epoch 00001: loss improved from inf to 7.61531, saving model to weights/lstm.hdf5
Epoch 2/20
Epoch 00002: loss improved from 7.61531 to 6.42914, saving model to weights/lstm.hdf5
Epoch 3/20
Epoch 00003: loss improved from 6.42914 to 6.12401, saving model to weights/lstm.hdf5
Epoch 4/20
Epoch 00004: loss improved from 6.12401 to 6.04974, saving model to weights/lstm.hdf5
Epoch 5/20
Epoch 00005: loss improved from 6.04974 to 6.01366, saving model to weights/lstm.hdf5
Epoch 6/20
Epoch 00006: loss improved from 6.01366 to 6.00278, saving model to weights/lstm.hdf5
Epoch 7/20
Epoch 00007: loss improved from 6.00278 to 5.99018, saving model to weights/lstm.hdf5
Epoch 8/20
Epoch 00008: loss improved from 5.99018 to 5.98621, saving model to weights/lstm.hdf5
Epoch 9/20
Epoch 00009: loss improved from 5.98621 to 5.98172, saving model to weights/lstm.hdf5
Epoch 10/20
Epoch 00010: loss did not improve from 5.98172
Epoch 11/20
Epoch 00011: loss improved from 5.9817

<tensorflow.python.keras.callbacks.History at 0x21780052f98>

## Baseline #4: LSTM + Word2Vec 

In [26]:
lstm_w2v = Sequential()

lstm_w2v.add(Embedding(input_dim=vocab_size, output_dim=100, weights=[w2v_model.wv.vectors]))
lstm_w2v.add(LSTM(256, input_shape=(vocab_size, 100), return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(256, return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(128))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(Dense(vocab_size, activation='softmax'))

In [27]:
lstm_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [28]:
filepath = "weights/lstm_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [39]:
lstm_w2v.fit(x, y, batch_size=128, epochs=20, callbacks=callbacks)

Train on 1485 samples
Epoch 1/20
Epoch 00001: loss improved from 8.92470 to 7.33414, saving model to weights/lstm_w2v.hdf5
Epoch 2/20
Epoch 00002: loss improved from 7.33414 to 6.29197, saving model to weights/lstm_w2v.hdf5
Epoch 3/20
Epoch 00003: loss improved from 6.29197 to 6.08482, saving model to weights/lstm_w2v.hdf5
Epoch 4/20
Epoch 00004: loss improved from 6.08482 to 6.02693, saving model to weights/lstm_w2v.hdf5
Epoch 5/20
Epoch 00005: loss improved from 6.02693 to 6.00411, saving model to weights/lstm_w2v.hdf5
Epoch 6/20
Epoch 00006: loss improved from 6.00411 to 5.99680, saving model to weights/lstm_w2v.hdf5
Epoch 7/20
Epoch 00007: loss improved from 5.99680 to 5.98746, saving model to weights/lstm_w2v.hdf5
Epoch 8/20
Epoch 00008: loss improved from 5.98746 to 5.98054, saving model to weights/lstm_w2v.hdf5
Epoch 9/20
Epoch 00009: loss improved from 5.98054 to 5.97674, saving model to weights/lstm_w2v.hdf5
Epoch 10/20
Epoch 00010: loss did not improve from 5.97674
Epoch 11/2

<tensorflow.python.keras.callbacks.History at 0x217de5de240>

## Generation

In [30]:
def sample(preds, temperature):
    if temperature <= 0:
        return np.argmax(preds)

    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
  
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
  
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [33]:
def word_to_id(word):
    return w2v_model.wv.key_to_index[word]

def id_to_word(id):
    return w2v_model.wv.index_to_key[id]

In [71]:
def generate(model=lstm, prompt="In this paper", words=20, temperature=0.2):
    word_ids = [word_to_id(word) for word in prompt.lower().split()]
    
    for i in range(words):
        prediction = model.predict(x=np.array(word_ids))
        id = sample(prediction[-1], temperature)
        word_ids.append(id)
    
    return ' '.join(id_to_word(id) for id in word_ids)

In [72]:
generate(model=lstm_w2v, prompt="In this paper", words=20)

'in this paper experimented technical ignorant literary agent moderation mos falls gaokao readily parlaclarin constellations mass sufficient jupyter opus porting point synonym pgn'

## Evaluation