# Baseline Models

In [9]:
import numpy as np

import gensim

from tensorflow.keras import utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import GRU, LSTM, Activation, Dense, Dropout, Embedding



In [10]:
w2v_model = gensim.models.KeyedVectors.load("w2v.model", mmap='r')

In [None]:
vocab_size, emdedding_size = w2v_model.wv.vectors.shape

In [12]:
x = np.load('data/x.npy')
y = np.load('data/y.npy')

## Baseline #1: GRU

In [13]:
gru = Sequential()

gru.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
gru.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(256, return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(128))
gru.add(Dropout(0.2))
gru.add(Dense(vocab_size, activation='softmax'))

In [14]:
gru.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [15]:
filepath = "weights/gru.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [16]:
gru.fit(x, y, batch_size=128, epochs=1, callbacks=callbacks)

Train on 1485 samples
Epoch 00001: loss improved from inf to 9.05919, saving model to weights/gru.hdf5


<tensorflow.python.keras.callbacks.History at 0x21796becc18>

## Baseline #2: GRU + Word2Vec

In [17]:
gru_w2v = Sequential()

gru_w2v.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size, weights=[w2v_model.wv.vectors]))
gru_w2v.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(256, return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(128))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(Dense(vocab_size, activation='softmax'))

In [18]:
gru_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [19]:
filepath = "weights/gru_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [21]:
gru_w2v.fit(x, y, batch_size=128, epochs=1, callbacks=callbacks)

Train on 1485 samples
Epoch 00001: loss improved from inf to 8.85612, saving model to weights/gru_w2v.hdf5


<tensorflow.python.keras.callbacks.History at 0x217a0b15978>

## Baseline #3: LSTM

In [22]:
lstm = Sequential()

lstm.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
lstm.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(256, return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(128))
lstm.add(Dropout(0.2))
lstm.add(Dense(vocab_size, activation='softmax'))

In [23]:
lstm.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [24]:
filepath = "weights/lstm.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [25]:
lstm.fit(x, y, batch_size=128, epochs=1, callbacks=callbacks)

Train on 1485 samples
Epoch 00001: loss improved from inf to 9.07976, saving model to weights/lstm.hdf5


<tensorflow.python.keras.callbacks.History at 0x217af897a20>

## Baseline #4: LSTM + Word2Vec 

In [26]:
lstm_w2v = Sequential()

lstm_w2v.add(Embedding(input_dim=vocab_size, output_dim=100, weights=[w2v_model.wv.vectors]))
lstm_w2v.add(LSTM(256, input_shape=(vocab_size, 100), return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(256, return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(128))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(Dense(vocab_size, activation='softmax'))

In [27]:
lstm_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [28]:
filepath = "weights/lstm_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [29]:
lstm_w2v.fit(x, y, batch_size=128, epochs=1, callbacks=callbacks)

Train on 1485 samples
Epoch 00001: loss improved from inf to 8.92470, saving model to weights/lstm_w2v.hdf5


<tensorflow.python.keras.callbacks.History at 0x217cc692da0>

## Generation

In [30]:
def sample(preds, temperature):
    if temperature <= 0:
        return np.argmax(preds)

    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
  
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
  
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [33]:
def word_to_id(word):
    return w2v_model.wv.key_to_index[word]

def id_to_word(id):
    return w2v_model.wv.index_to_key[id]

In [34]:
def generate(model=lstm, prompt="In this paper", words=20, temperature=1.0):
    word_ids = [word_to_id(word) for word in prompt.lower().split()]
    
    for i in range(words):
        prediction = model.predict(x=np.array(word_ids))
        id = sample(prediction[-1], temperature)
        word_ids.append(id)
    
    return ' '.join(id_to_word(id) for id in word_ids)

In [36]:
generate(model=lstm_w2v, words=20)

'in this paper marker communication pythagoras qlm logistics cnec analytic sites publishers hension machinery kotonoha adjusted inter operator comes cursory trajectory komi seasons'

## Evaluation