# Baseline Models

In [1]:
import numpy as np

import gensim

import math

from tensorflow.keras import utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import GRU, LSTM, Activation, Dense, Dropout, Embedding
from tensorflow.keras.callbacks import History 



In [2]:
w2v_model = gensim.models.KeyedVectors.load("w2v.model", mmap='r')

In [3]:
vocab_size, emdedding_size = w2v_model.wv.vectors.shape
vocab_size, emdedding_size

(17862, 100)

In [4]:
x = np.load('data/x.npy')
y = np.load('data/y.npy')[:,0]

In [5]:
x.shape, y.shape

((5841, 100), (5841,))

In [6]:
# instantiate history to save losses
history = History()

## Baseline #1: GRU

In [7]:
gru = Sequential()

gru.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
gru.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(256, return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(128))
gru.add(Dropout(0.2))
gru.add(Dense(vocab_size, activation='softmax'))

In [8]:
gru.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [9]:
filepath = "weights/gru.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [None]:
gru_loss = gru.fit(x, y, validation_split=0.2, batch_size=64, epochs=10, callbacks=callbacks)

Train on 4672 samples, validate on 1169 samples
Epoch 1/10
Epoch 00001: val_loss improved from inf to 7.53704, saving model to weights/gru.hdf5
Epoch 2/10
Epoch 00002: val_loss did not improve from 7.53704
Epoch 3/10
Epoch 00003: val_loss did not improve from 7.53704
Epoch 4/10
Epoch 00004: val_loss did not improve from 7.53704
Epoch 5/10
Epoch 00005: val_loss did not improve from 7.53704
Epoch 6/10
Epoch 00006: val_loss did not improve from 7.53704
Epoch 7/10
Epoch 00007: val_loss did not improve from 7.53704
Epoch 8/10
Epoch 00008: val_loss did not improve from 7.53704
Epoch 9/10

In [None]:
gru_loss = gru.fit(x, y, validation_split=0.2, batch_size=64, epochs=5, callbacks=callbacks)

Train on 4672 samples, validate on 1169 samples
Epoch 1/10
Epoch 00001: val_loss improved from inf to 7.53704, saving model to weights/gru.hdf5
Epoch 2/10
Epoch 00002: val_loss did not improve from 7.53704
Epoch 3/10
Epoch 00003: val_loss did not improve from 7.53704
Epoch 4/10
Epoch 00004: val_loss did not improve from 7.53704
Epoch 5/10
Epoch 00005: val_loss did not improve from 7.53704
Epoch 6/10
Epoch 00006: val_loss did not improve from 7.53704
Epoch 7/10
Epoch 00007: val_loss did not improve from 7.53704
Epoch 8/10
Epoch 00008: val_loss did not improve from 7.53704
Epoch 9/10

## Baseline #2: GRU + Word2Vec

In [None]:
gru_w2v = Sequential()

gru_w2v.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size, weights=[w2v_model.wv.vectors]))
gru_w2v.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(256, return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(128))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(Dense(vocab_size, activation='softmax'))

In [None]:
gru_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [None]:
filepath = "weights/gru_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [None]:
gru_w2v_loss = gru_w2v.fit(x, y, validation_split=0.2, batch_size=64, epochs=10, callbacks=callbacks)

## Baseline #3: LSTM 

In [None]:
lstm = Sequential()

lstm.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
lstm.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(256, return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(128))
lstm.add(Dropout(0.2))
lstm.add(Dense(vocab_size, activation='softmax'))

In [None]:
lstm.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [None]:
filepath = "weights/lstm.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [None]:
lstm_loss = lstm.fit(x, y, validation_split=0.2, batch_size=64, epochs=10, callbacks=callbacks)

## Baseline #4: LSTM + Word2Vec 

In [None]:
lstm_w2v = Sequential()

lstm_w2v.add(Embedding(input_dim=vocab_size, output_dim=100, weights=[w2v_model.wv.vectors]))
lstm_w2v.add(LSTM(256, input_shape=(vocab_size, 100), return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(256, return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(128))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(Dense(vocab_size, activation='softmax'))

In [None]:
lstm_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [None]:
filepath = "weights/lstm_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [None]:
lstm_w2v_loss = lstm_w2v.fit(x, y, validation_split=0.2, batch_size=64, epochs=10, callbacks=callbacks)

## Generation

In [None]:
def sample(preds, temperature):
    if temperature <= 0:
        return np.argmax(preds)

    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
  
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
  
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
def word_to_id(word):
    return w2v_model.wv.key_to_index[word]

def id_to_word(id):
    return w2v_model.wv.index_to_key[id]

In [None]:
def generate(model=gru, prompt='In this paper', words=20, temperature=0.2):
    word_ids = [word_to_id(word) for word in prompt.lower().split()]
    
    for i in range(words):
        prediction = model.predict(x=np.array(word_ids))
        id = sample(prediction[-1], temperature)
        word_ids.append(id)
    
    return ' '.join(id_to_word(id) for id in word_ids)

In [None]:
generate(model=lstm, prompt='our approach', temperature=0.4)

## Evaluation

In [None]:
name = {gru_loss: 'GRU', gru_w2v_loss: 'GRU + Word2Vec', lstm_loss: 'LSTM', lstm_w2v_loss: 'LSTM + Word2Vec'}

In [None]:
def min_val_loss(model):
    min model.history['val_loss']

In [None]:
for model in [gru_loss, gru_w2v_loss, lstm_loss, lstm_w2v_loss]:
    print("Minimum validation loss for {}: {:.5f}".format(name[model], min_val_loss(model)))
    print("\Perplexity for model {}: {:.2f}".format(name[model], math.exp(min_val_loss(model))))