## RNN Model

In [86]:
import os
import numpy as np
from IPython.display import HTML
import json
from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)
import re
import string

In [2]:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [4]:
text = open(os.path.join(os.getcwd(), 'data_Shakespeare/shakespeare.txt')).read()

#### Data Preprocessing

In [5]:
# Remove digits, blank lines, and extra spaces. 
lines = text.splitlines()
all_lines = []

for line in lines: 
    if line.strip().isdigit() == False and len(line) > 1: 
        clean_line = line.strip().lower()
        clean_line = re.sub('[(){}<>]', '', clean_line)
        all_lines.append(clean_line + '\n')

In [6]:
# Save clean poetry lines into a new file
with open('ShakespeareLines.txt', 'w') as f:
    for line in all_lines:
        f.write(line)

f.close()

In [7]:
# Import raw Shakespeare lines
raw_text = open(os.path.join(os.getcwd(), 'ShakespeareLines.txt')).read()

In [8]:
# Generate all possible sequences of 40 characters
seq_len = 40
all_sequences = []
raw_sequences = ''
for i in range(seq_len, len(raw_text)):
    seq = raw_text[i-seq_len:i+1]
    all_sequences.append(seq)
    raw_sequences += seq + '\n'

In [9]:
# All unique characters
chars = sorted(list(set(raw_sequences)))

In [10]:
# One hot-encoded dictionaries mapping from char to indice and from indice to char
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [11]:
# Encode the sequences with our hot-encoded dictionaries
encoded_seq = []
for seq in all_sequences:
    encoded = [char_indices[char] for char in seq]
    encoded_seq.append(encoded)

In [12]:
vocab_size = len(char_indices)

In [14]:
# Split encoded sequence into X and y for training the model
encoded_seq = np.asarray(encoded_seq)
X, y = encoded_seq[:,:-1], encoded_seq[:,-1]
encoded_seq = [to_categorical(x, num_classes=vocab_size) for x in X]
X = np.asarray(encoded_seq)
y = to_categorical(y, num_classes=vocab_size)

### Model Training

In [15]:
# Build model - single layer model with 128 LSTM units and softmax output
model = Sequential()
model.add(LSTM(128, input_shape=(seq_len, vocab_size)))
model.add(Dense(vocab_size, activation='softmax'))

In [16]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
# Fit model
model.fit(X, y, epochs=60, verbose=2)

Epoch 1/60
 - 96s - loss: 2.3375 - accuracy: 0.3267
Epoch 2/60
 - 95s - loss: 1.9434 - accuracy: 0.4212
Epoch 3/60
 - 95s - loss: 1.7992 - accuracy: 0.4581
Epoch 4/60
 - 107s - loss: 1.7101 - accuracy: 0.4807
Epoch 5/60
 - 97s - loss: 1.6444 - accuracy: 0.4970
Epoch 6/60
 - 121s - loss: 1.5921 - accuracy: 0.5123
Epoch 7/60
 - 111s - loss: 1.5473 - accuracy: 0.5235
Epoch 8/60
 - 102s - loss: 1.5078 - accuracy: 0.5349
Epoch 9/60
 - 102s - loss: 1.4736 - accuracy: 0.5439
Epoch 10/60
 - 114s - loss: 1.4413 - accuracy: 0.5533
Epoch 11/60
 - 118s - loss: 1.4118 - accuracy: 0.5612
Epoch 12/60
 - 107s - loss: 1.3838 - accuracy: 0.5676
Epoch 13/60
 - 117s - loss: 1.3567 - accuracy: 0.5755
Epoch 14/60
 - 123s - loss: 1.3311 - accuracy: 0.5825
Epoch 15/60
 - 127s - loss: 1.3064 - accuracy: 0.5900
Epoch 16/60
 - 136s - loss: 1.2824 - accuracy: 0.5965
Epoch 17/60
 - 112s - loss: 1.2595 - accuracy: 0.6042
Epoch 18/60
 - 114s - loss: 1.2385 - accuracy: 0.6095
Epoch 19/60
 - 114s - loss: 1.2155 - accu

<keras.callbacks.callbacks.History at 0x13d8ebd10>

### Generate poem

In [18]:
# Temperature - https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
def sample(preds, temperature):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [87]:
# Generate poem from RNN model
def generate_poem(seed_text, temp): 
    output = seed_text
    lines = 1
    for i in range(10000):
        # Encode the current generated text
        encoded = [char_indices[char] for char in output]
        encoded = pad_sequences([encoded], maxlen=seq_len, truncating='pre')
        encoded = to_categorical(encoded, num_classes=len(char_indices))
        # Predict next character (using temperature)
        pred = model.predict(encoded, verbose=0)[0]
        next_index = sample(pred, temp)
        next_char = indices_char[next_index]
        output += next_char
        if next_char == '\n': 
            lines += 1
        if lines == 14: 
            break
    if output[-2] in string.punctuation: 
        if output[-2] != '.' or output[-2] != '!' or output[-2] != '?': 
            return output[:-2] + '.'
        else: 
            return output[:-1]

    return output[:-1] + '.'

In [92]:
poem_1 = generate_poem("shall i compare thee to a summer's day?\n", 1.5)

In [93]:
print(poem_1)

shall i compare thee to a summer's day?
to-grable hath in that hatethrep's outblack sin elt!
beansard which i stedpy day for wheredyy?
thou woul yets now grows of aliquliethte.
the ghymy', though blenss so dismine,
him most trought me bountion forge'e foubed,
simer to them, nim st, betreanes both's living enemen;
gavent far thou, her coveretc uutervide
so mar ifth from a faced do hure ther,
sum, dost long your eye i cheple sevire cheek dit,
and therefore shalows dwellst desseed, wilks boust kind,
the herour in thy love no, for love impain.
a warns a reet-bairted roched equisage;
more ngasung tranked, upon my povaping.


In [96]:
poem_2 = generate_poem("shall i compare thee to a summer's day?\n", 0.75)

In [97]:
print(poem_2)

shall i compare thee to a summer's day?
then all my hold, len their love doth thought,
and need berouty to time all earth.
for then vice is do fame in all write!
the ray by thy rich ronc, when he waknes bond,
or he am a warthour in my mistress
when thy heart do not precious uptainge.
o no, that nearally dreye with the cunst
and ear thy sweet self thy recomp love away,
since which hair should be distind to be,
to leaves to the romes to plece-cheat, when the receives on san,
which i alter pentle the strangery,
steal nothing or a cankle of the heart,
when i have sweet heat'st this swayldy stipe.


In [100]:
poem_3 = generate_poem("shall i compare thee to a summer's day?\n", 0.25)

In [101]:
print(poem_3)

shall i compare thee to a summer's day?
thou art some and more that this hell no mon.
i know the constance of their glass and face,
without me her be see thy sweet love have,
when eyes it with fair after their prize
to see the worst, whose most in one rest,
call the fair which thou rememply it bear.
let those that more presons the dear to be.
for i have pleasure of thy for their prove,
where then my love and shame all my silf,
why whos ye worth in your sweet self to be,
to see the form were born their state woush thy heart,
the owe of so, mine eye is the trow of me,
san your power to make the worst me her sight.
