# LSTM Generation Study
---
Glenn Abastillas

In [9]:
from keras.layers import Input, Embedding, LSTM, Dense, Reshape, Flatten
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences, TimeseriesGenerator
from keras.models import Model, Sequential

from keras.datasets import imdb
import numpy as np

# np_load_old = np.load
# np.load = lambda *a, **k : np_load_old(*a, allow_pickle=True, **k)

In [6]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)

TypeError: <lambda>() got multiple values for keyword argument 'allow_pickle'

In [18]:
x_train = pad_sequences(x_train, maxlen=200, padding='post')
x_test = pad_sequences(x_test, maxlen=200, padding='post')

#### Preprocess Data

Input has to be np.ndarray

In [97]:
ts_train = TimeseriesGenerator(x_train.flatten(), x_train.flatten(), 10, batch_size=10, sampling_rate=1)

In [98]:
len(ts_train)

499999

In [99]:
context, target = ts_train[0]

In [100]:
context.shape, target.shape

((10, 10), (10,))

In [104]:
context

array([[  5.,  25., 100.,  43., 838., 112.,  50., 670.,   2.,   9.],
       [ 25., 100.,  43., 838., 112.,  50., 670.,   2.,   9.,  35.],
       [100.,  43., 838., 112.,  50., 670.,   2.,   9.,  35., 480.],
       [ 43., 838., 112.,  50., 670.,   2.,   9.,  35., 480., 284.],
       [838., 112.,  50., 670.,   2.,   9.,  35., 480., 284.,   5.],
       [112.,  50., 670.,   2.,   9.,  35., 480., 284.,   5., 150.],
       [ 50., 670.,   2.,   9.,  35., 480., 284.,   5., 150.,   4.],
       [670.,   2.,   9.,  35., 480., 284.,   5., 150.,   4., 172.],
       [  2.,   9.,  35., 480., 284.,   5., 150.,   4., 172., 112.],
       [  9.,  35., 480., 284.,   5., 150.,   4., 172., 112., 167.]])

In [103]:
target

array([ 35., 480., 284.,   5., 150.,   4., 172., 112., 167.,   2.])

In [105]:
context_, target_ = context, target

In [106]:
target_ = np.array([to_categorical(_, 999) for _ in target_])

In [107]:
context_.shape, target_.shape

((10, 10), (10, 999))

#### Build Model

In [115]:
I = Input((10, 1))
E = LSTM(128)(I)
D = Dense(999, activation='softmax')(E)

In [116]:
model = Model(inputs=I, outputs=D)
model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
model.summary()

Model: "model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 10, 1)             0         
_________________________________________________________________
lstm_10 (LSTM)               (None, 128)               66560     
_________________________________________________________________
dense_9 (Dense)              (None, 999)               128871    
Total params: 195,431
Trainable params: 195,431
Non-trainable params: 0
_________________________________________________________________


---
Single sample test

In [118]:
model.fit(context_.reshape(10, 10, 1), target_, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.callbacks.History at 0x125da9160>

#### Generate Text

In [121]:
g = context_[np.random.choice(context_.shape[0]), :10]

In [132]:
g = np.array(range(10))

In [133]:
model.predict(g.reshape(1, 10, 1)).argmax()

480

In [136]:
model.predict(np.append(g[1:], [480]).reshape(1, 10, 1)).argmax()

284