# Baseline Models

In [31]:
import numpy as np

import gensim

from tensorflow.keras import utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import GRU, LSTM, Activation, Dense, Dropout, Embedding
from tensorflow.keras.callbacks import History 

In [32]:
history = History()

In [2]:
w2v_model = gensim.models.KeyedVectors.load("w2v.model", mmap='r')

In [3]:
vocab_size, emdedding_size = w2v_model.wv.vectors.shape
vocab_size, emdedding_size

(17862, 100)

In [4]:
x = np.load('data/x.npy')
y = np.load('data/y.npy')[:,0]

In [5]:
x.shape, y.shape

((5841, 100), (5841,))

## Baseline #1: GRU

In [6]:
gru = Sequential()

gru.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
gru.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(256, return_sequences=True))
gru.add(Dropout(0.2))
gru.add(GRU(128))
gru.add(Dropout(0.2))
gru.add(Dense(vocab_size, activation='softmax'))

In [7]:
gru.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [35]:
filepath = "weights/gru.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [36]:
gru_history = gru.fit(x, y, validation_split = 0.2, batch_size=64, epochs=20, callbacks=callbacks)

Train on 4672 samples, validate on 1169 samples
Epoch 1/20
 320/4672 [=>............................] - ETA: 1:06 - loss: 3.6596
Epoch 00001: loss improved from inf to 2.92768, saving model to weights/gru.hdf5
 320/4672 [=>............................] - ETA: 1:08 - loss: 3.6596

KeyboardInterrupt: 

## Baseline #2: GRU + Word2Vec

In [10]:
gru_w2v = Sequential()

gru_w2v.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size, weights=[w2v_model.wv.vectors]))
gru_w2v.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(256, return_sequences=True))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(GRU(128))
gru_w2v.add(Dropout(0.2))
gru_w2v.add(Dense(vocab_size, activation='softmax'))

In [11]:
gru_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [12]:
filepath = "weights/gru_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [13]:
gru_w2v_history = gru_w2v.fit(x, y, validation_split = 0.2, batch_size=64, epochs=20, callbacks=callbacks)

Train on 5841 samples
Epoch 1/50
Epoch 00001: loss improved from inf to 7.97445, saving model to weights/gru_w2v.hdf5
Epoch 2/50
Epoch 00002: loss improved from 7.97445 to 6.81537, saving model to weights/gru_w2v.hdf5
Epoch 3/50
Epoch 00003: loss improved from 6.81537 to 6.57446, saving model to weights/gru_w2v.hdf5
Epoch 4/50
Epoch 00004: loss improved from 6.57446 to 6.34531, saving model to weights/gru_w2v.hdf5
Epoch 5/50
Epoch 00005: loss improved from 6.34531 to 6.15654, saving model to weights/gru_w2v.hdf5
Epoch 6/50
Epoch 00006: loss improved from 6.15654 to 5.99111, saving model to weights/gru_w2v.hdf5
Epoch 7/50
Epoch 00007: loss improved from 5.99111 to 5.82747, saving model to weights/gru_w2v.hdf5
Epoch 8/50
Epoch 00008: loss improved from 5.82747 to 5.61259, saving model to weights/gru_w2v.hdf5
Epoch 9/50
Epoch 00009: loss improved from 5.61259 to 5.34501, saving model to weights/gru_w2v.hdf5
Epoch 10/50
Epoch 00010: loss improved from 5.34501 to 5.09102, saving model to we

<tensorflow.python.keras.callbacks.History at 0x1b886378dd8>

## Baseline #3: LSTM 

In [None]:
lstm = Sequential()

lstm.add(Embedding(input_dim=vocab_size, output_dim=emdedding_size))
lstm.add(GRU(256, input_shape=(vocab_size, emdedding_size), return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(256, return_sequences=True))
lstm.add(Dropout(0.2))
lstm.add(GRU(128))
lstm.add(Dropout(0.2))
lstm.add(Dense(vocab_size, activation='softmax'))

In [None]:
lstm.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [None]:
filepath = "weights/lstm.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [None]:
lstm_history = lstm.fit(x, y, validation_split = 0.2, batch_size=64, epochs=20, callbacks=callbacks)

## Baseline #4: LSTM + Word2Vec 

In [37]:
lstm_w2v = Sequential()

lstm_w2v.add(Embedding(input_dim=vocab_size, output_dim=100, weights=[w2v_model.wv.vectors]))
lstm_w2v.add(LSTM(256, input_shape=(vocab_size, 100), return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(256, return_sequences=True))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(LSTM(128))
lstm_w2v.add(Dropout(0.2))
lstm_w2v.add(Dense(vocab_size, activation='softmax'))

In [38]:
lstm_w2v.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [39]:
filepath = "weights/lstm_w2v.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks = [checkpoint]

In [None]:
lstm_w2v_history = lstm_w2v.fit(x, y, validation_split = 0.2, batch_size=64, epochs=20, callbacks=callbacks)

## Generation

In [14]:
def sample(preds, temperature):
    if temperature <= 0:
        return np.argmax(preds)

    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
  
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
  
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [15]:
def word_to_id(word):
    return w2v_model.wv.key_to_index[word]

def id_to_word(id):
    return w2v_model.wv.index_to_key[id]

In [23]:
def generate(model=gru, prompt='In this paper', words=20, temperature=0.1):
    word_ids = [word_to_id(word) for word in prompt.lower().split()]
    
    for i in range(words):
        prediction = model.predict(x=np.array(word_ids))
        id = sample(prediction[-1], temperature)
        word_ids.append(id)
    
    return ' '.join(id_to_word(id) for id in word_ids)

In [26]:
generate(model=gru_w2v, prompt='temporal embeddings are', words=20)

'temporal embeddings are accomplishing memorynetwork bhe optimale determination breaking corola groupes wadden checkups similarly ignores vall andreasvc wikibert represen exerted closeness abstaining extrins'

## Evaluation