In [1]:
import os
from ipynb.fs.full.preprocessing import parse_observations, sample_sentence
import tensorflow as tf
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Conv1D, MaxPooling1D
from keras.datasets import imdb

import numpy as np

Using TensorFlow backend.


In [2]:
def obs_map_reverser(obs_map):
    obs_map_r = {}

    for key in obs_map:
        obs_map_r[obs_map[key]] = key

    return obs_map_r

def sample_sentence(hmm, obs_map, n_words=100):
    # Get reverse map.
    obs_map_r = obs_map_reverser(obs_map)

    # Sample and convert sentence.
    emission, states = hmm.generate_emission(n_words)
    sentence = [obs_map_r[i] for i in emission]

    return ' '.join(sentence).capitalize()

def generate_obs(hmm, obs_map):
    '''
    Naively generates 14-line sonnet with 10 words each.
    
    Inputs:
    hmm: trained hmm
    obs_map: maps word to observation index
    
    Outputs:
    None
    '''
    # generate all words in sonnet
    all_words = sample_sentence(hmm, obs_map, 140)
    
    # split into 14 lines and add capitalization/naive punctuation
    for i in range(14):
        count = 0
        line = ' '.join(all_words[i*10:(i+1)*10]).capitalize()
        if i == 11 or i == 12:
            line += ','
        else:
            line += '.'
        print(line)

In [3]:
text = open(os.path.join(os.getcwd(), 'data/shakespeare.txt')).read()
obs, obs_map = parse_observations(text)

In [4]:
print(obs)

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 10, 24, 21, 25], [13, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 34, 46, 34, 47, 28, 34, 48, 46, 49, 50], [26, 6, 51, 52, 15, 53, 54, 55, 56, 57, 58, 28, 15, 59, 60, 61, 29, 30, 62, 63, 34, 64, 56, 22, 65, 66, 67, 68, 69], [70, 15, 71, 72, 73, 74, 75, 76, 28, 77, 15, 53, 78, 18, 15, 79, 56, 80], [81, 82, 83, 84, 85, 34, 86, 56, 87, 88, 89, 68, 34, 8, 90, 34, 91, 92, 93, 94, 95, 96, 52, 97, 76, 41, 98, 99, 100, 101, 102, 103], [104, 105, 106, 43, 107, 34, 108, 45, 43, 107, 15, 109, 100, 34, 110, 111, 28, 112, 61, 29, 30, 88, 113, 32, 114, 115, 116, 117, 56, 118, 119], [120, 121, 122, 119, 123, 34, 8, 124, 125, 26, 126, 127, 74, 128, 129, 100, 130, 84, 131, 132, 133, 56, 134, 132, 135, 136, 137, 21, 108, 18, 138, 29], [74, 114, 28, 76, 139, 140, 81, 26, 51, 135, 56, 141, 34, 142, 143, 81, 26, 144, 145, 146], [147, 68, 34, 148, 56, 149, 15, 150, 26, 151, 52, 152, 15

In [5]:
print(obs_map)



In [6]:
print(text)

                   1
From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed'st thy light's flame with self-substantial fuel,
Making a famine where abundance lies,
Thy self thy foe, to thy sweet self too cruel:
Thou that art now the world's fresh ornament,
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And tender churl mak'st waste in niggarding:
  Pity the world, or else this glutton be,
  To eat the world's due, by the grave and thee.


                   2
When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a tattered weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say within thine own deep sunken eyes,
Were an all-eating shame, and thriftle

In [24]:
x_train = []
data=[]
for i in range(len(obs)):
    for j in range(len(obs[i])):
        if (len(data) == 40):
            x_train.append(data)
            data = []
        else:
            data.append(obs[i][j])
x_train = np.array(x_train)
print(x_train)

[[   0    1    2 ...   34   35   36]
 [  38   39   40 ...   63   34   64]
 [  22   65   66 ...    8   90   34]
 ...
 [  15 1312  100 ... 2908   18   21]
 [  21 3304 2650 ...   15 2354  100]
 [   4  288 3312 ... 3318 3052  268]]


In [28]:
max_features = len(obs_map)
embedding_size = 150
maxlen = 40
batch_size = 20
epochs = 20

model = Sequential()
# Add an Embedding layer expecting input vocab of size 1000, and
# output embedding dimension of size 64.
model.add(Embedding(max_features, embedding_size, input_length=maxlen))

# Add a LSTM layer with 128 internal units.
model.add(LSTM(150))

# Add a Dense layer with 10 units.
model.add(Dense(40, activation = 'softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['mse'])

model.summary()
model.fit(x_train, x_train,
          batch_size=batch_size,
          epochs=epochs)

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, 40, 150)           498150    
_________________________________________________________________
lstm_9 (LSTM)                (None, 150)               180600    
_________________________________________________________________
dense_9 (Dense)              (None, 40)                6040      
Total params: 684,790
Trainable params: 684,790
Non-trainable params: 0
_________________________________________________________________


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x1a656cfec88>