## RNN Model

In [1]:
import os
import numpy as np
from IPython.display import HTML
import json
from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)
import re
import string

In [2]:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [3]:
text = open(os.path.join(os.getcwd(), 'data_Shakespeare/shakespeare.txt')).read()

#### Data Preprocessing

In [4]:
# Remove digits, blank lines, and extra spaces. 
lines = text.splitlines()
all_lines = []

for line in lines: 
    if line.strip().isdigit() == False and len(line) > 1: 
        clean_line = line.strip().lower()
        clean_line = re.sub('[(){}<>]', '', clean_line)
        all_lines.append(clean_line + '\n')

In [5]:
# Save clean poetry lines into a new file
with open('ShakespeareLines.txt', 'w') as f:
    for line in all_lines:
        f.write(line)

f.close()

In [6]:
# Import raw Shakespeare lines
raw_text = open(os.path.join(os.getcwd(), 'ShakespeareLines.txt')).read()

In [7]:
# Generate all possible sequences of 40 characters
seq_len = 40
all_sequences = []
raw_sequences = ''
for i in range(seq_len, len(raw_text)):
    seq = raw_text[i-seq_len:i+1]
    all_sequences.append(seq)
    raw_sequences += seq + '\n'

In [8]:
# All unique characters
chars = sorted(list(set(raw_sequences)))

In [9]:
# One hot-encoded dictionaries mapping from char to indice and from indice to char
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [10]:
# Encode the sequences with our hot-encoded dictionaries
encoded_seq = []
for seq in all_sequences:
    encoded = [char_indices[char] for char in seq]
    encoded_seq.append(encoded)

In [11]:
vocab_size = len(char_indices)

In [12]:
# Split encoded sequence into X and y for training the model
encoded_seq = np.asarray(encoded_seq)
X, y = encoded_seq[:,:-1], encoded_seq[:,-1]
encoded_seq = [to_categorical(x, num_classes=vocab_size) for x in X]
X = np.asarray(encoded_seq)
y = to_categorical(y, num_classes=vocab_size)

### Model Training

In [40]:
# Build model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(seq_len, vocab_size)))
model.add(LSTM(64))
model.add(Dense(vocab_size, activation='softmax'))

In [41]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [42]:
# Fit model
model.fit(X, y, epochs=80, verbose=2)

Epoch 1/80
 - 207s - loss: 2.4079 - accuracy: 0.3085
Epoch 2/80
 - 199s - loss: 1.9829 - accuracy: 0.4104
Epoch 3/80
 - 201s - loss: 1.8476 - accuracy: 0.4451
Epoch 4/80
 - 200s - loss: 1.7406 - accuracy: 0.4741
Epoch 5/80
 - 175s - loss: 1.6680 - accuracy: 0.4940
Epoch 6/80
 - 180s - loss: 1.6112 - accuracy: 0.5078
Epoch 7/80
 - 168s - loss: 1.5653 - accuracy: 0.5193
Epoch 8/80
 - 169s - loss: 1.5233 - accuracy: 0.5313
Epoch 9/80
 - 179s - loss: 1.4862 - accuracy: 0.5410
Epoch 10/80
 - 187s - loss: 1.7284 - accuracy: 0.4800
Epoch 11/80
 - 187s - loss: 1.7342 - accuracy: 0.4753
Epoch 12/80
 - 200s - loss: 1.5178 - accuracy: 0.5334
Epoch 13/80
 - 199s - loss: 1.4748 - accuracy: 0.5455
Epoch 14/80
 - 192s - loss: 1.4475 - accuracy: 0.5514
Epoch 15/80
 - 208s - loss: 1.4247 - accuracy: 0.5571
Epoch 16/80
 - 211s - loss: 1.4000 - accuracy: 0.5641
Epoch 17/80
 - 201s - loss: 1.3886 - accuracy: 0.5672
Epoch 18/80
 - 177s - loss: 1.3580 - accuracy: 0.5749
Epoch 19/80
 - 202s - loss: 1.3284 - 

<keras.callbacks.callbacks.History at 0x165623f90>

In [39]:
model.save('model_dropout.h5')

### Generate poem

In [44]:
# Temperature - https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
def sample(preds, temperature):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [45]:
# Generate poem from RNN model
def generate_poem(seed_text, temp): 
    output = seed_text
    # Keep in track of number of lines generated
    lines = 1
    for i in range(10000):
        # Encode the current generated text
        encoded = [char_indices[char] for char in output]
        encoded = pad_sequences([encoded], maxlen=seq_len, truncating='pre')
        encoded = to_categorical(encoded, num_classes=len(char_indices))
        # Predict next character (using temperature)
        pred = model.predict(encoded, verbose=0)[0]
        next_index = sample(pred, temp)
        next_char = indices_char[next_index]
        output += next_char
        if next_char == '\n': 
            lines += 1
        # Break once we get 14 lines
        if lines == 14: 
            break
    
    # Handles final punctuation - make it a period if it is not 
    if output[-2] in string.punctuation: 
        if output[-2] != '.' or output[-2] != '!' or output[-2] != '?': 
            output = output[:-2] + '.'
        else: 
            output = output[:-1]
    else: 
        output = output[:-1] + '.'
    
    # Capitalize first letter of each line
    final_output = '\n'.join([line.capitalize() for line in output.splitlines()])

    return final_output

In [56]:
poem_1 = generate_poem("shall i compare thee to a summer's day?\n", 1.5)
print(poem_1)

Shall i compare thee to a summer's day?
Thou must feeso eyesure doth shade my burt,
And swrenks no dyet gens in their might
To the infowing chesiclave i seen!
From the than your love repoves to his skide?
For hell as thou shalt un a lovinh hearts?
For leth 'ounthing, thou she wint sughts dot, crussed,
By all und in singlate joy an the but lie,
Doshy what it me, like framore of good spite.
If i nor again, or canter fladvoobessy rie,
Scan rung in me checked 'tlo knlsge wiin,
Which swift tears your love 'tis canequear.
They to your fave like her id will wart seem,
On love to groath after nor-are.


In [48]:
poem_2 = generate_poem("shall i compare thee to a summer's day?\n", 0.75)
print(poem_2)

Shall i compare thee to a summer's day?
Thou art all thy gives to her time?
O no, thus in thee, and thred all tell puctlest ever,
And so thee i fold as an eyes wes,
Is in the breath, by art ornow, she is not,
To with the parts of thee, which live you with gentle soun
Doth heavened think on thine eye hath none:
Then you hand in other the sweeted beauty's pains,
And truly besiel fortuous, than theire age,
Maker shall i then in thee gazed nor well,
Than thou awakned in theich untaintay, are,
My alont to second foundon was blactioned
With canter even which now too my trust,
And in a kind and i be the past that looks.


In [49]:
poem_3 = generate_poem("shall i compare thee to a summer's day?\n", 0.25)
print(poem_3)

Shall i compare thee to a summer's day?
Thou art all their sweep she mind is love.
Now list i amain of thy love is subs,
Then have i do i lose of sinsumented line,
And both proidery? herefomen seat,
Me to thee ale the time to summer's days.
At the world to be wide my sweet so brow,
To trifled it self decaye, age, that so write
To your self to waste of things user-ling,
For they ere to worserosed for the part,
To leath, which is not so ill wretheres used,
And sweet self thou stick thee offarche,
Than thou shouldst be bott, a dud'st that breast
That thine eyes my mind nates be kind, express.
