## RNN Model

In [86]:
import os
import numpy as np
from IPython.display import HTML
import json
from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)
import re
import string

In [2]:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [4]:
text = open(os.path.join(os.getcwd(), 'data_Shakespeare/shakespeare.txt')).read()

#### Data Preprocessing

In [5]:
# Remove digits, blank lines, and extra spaces. 
lines = text.splitlines()
all_lines = []

for line in lines: 
    if line.strip().isdigit() == False and len(line) > 1: 
        clean_line = line.strip().lower()
        clean_line = re.sub('[(){}<>]', '', clean_line)
        all_lines.append(clean_line + '\n')

In [6]:
# Save clean poetry lines into a new file
with open('ShakespeareLines.txt', 'w') as f:
    for line in all_lines:
        f.write(line)

f.close()

In [7]:
# Import raw Shakespeare lines
raw_text = open(os.path.join(os.getcwd(), 'ShakespeareLines.txt')).read()

In [8]:
# Generate all possible sequences of 40 characters
seq_len = 40
all_sequences = []
raw_sequences = ''
for i in range(seq_len, len(raw_text)):
    seq = raw_text[i-seq_len:i+1]
    all_sequences.append(seq)
    raw_sequences += seq + '\n'

In [9]:
# All unique characters
chars = sorted(list(set(raw_sequences)))

In [10]:
# One hot-encoded dictionaries mapping from char to indice and from indice to char
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [11]:
# Encode the sequences with our hot-encoded dictionaries
encoded_seq = []
for seq in all_sequences:
    encoded = [char_indices[char] for char in seq]
    encoded_seq.append(encoded)

In [12]:
vocab_size = len(char_indices)

In [14]:
# Split encoded sequence into X and y for training the model
encoded_seq = np.asarray(encoded_seq)
X, y = encoded_seq[:,:-1], encoded_seq[:,-1]
encoded_seq = [to_categorical(x, num_classes=vocab_size) for x in X]
X = np.asarray(encoded_seq)
y = to_categorical(y, num_classes=vocab_size)

### Model Training

In [15]:
# Build model - single layer model with 128 LSTM units and softmax output
model = Sequential()
model.add(LSTM(128, input_shape=(seq_len, vocab_size)))
model.add(Dense(vocab_size, activation='softmax'))

In [16]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
# Fit model
model.fit(X, y, epochs=60, verbose=2)

Epoch 1/60
 - 96s - loss: 2.3375 - accuracy: 0.3267
Epoch 2/60
 - 95s - loss: 1.9434 - accuracy: 0.4212
Epoch 3/60
 - 95s - loss: 1.7992 - accuracy: 0.4581
Epoch 4/60
 - 107s - loss: 1.7101 - accuracy: 0.4807
Epoch 5/60
 - 97s - loss: 1.6444 - accuracy: 0.4970
Epoch 6/60
 - 121s - loss: 1.5921 - accuracy: 0.5123
Epoch 7/60
 - 111s - loss: 1.5473 - accuracy: 0.5235
Epoch 8/60
 - 102s - loss: 1.5078 - accuracy: 0.5349
Epoch 9/60
 - 102s - loss: 1.4736 - accuracy: 0.5439
Epoch 10/60
 - 114s - loss: 1.4413 - accuracy: 0.5533
Epoch 11/60
 - 118s - loss: 1.4118 - accuracy: 0.5612
Epoch 12/60
 - 107s - loss: 1.3838 - accuracy: 0.5676
Epoch 13/60
 - 117s - loss: 1.3567 - accuracy: 0.5755
Epoch 14/60
 - 123s - loss: 1.3311 - accuracy: 0.5825
Epoch 15/60
 - 127s - loss: 1.3064 - accuracy: 0.5900
Epoch 16/60
 - 136s - loss: 1.2824 - accuracy: 0.5965
Epoch 17/60
 - 112s - loss: 1.2595 - accuracy: 0.6042
Epoch 18/60
 - 114s - loss: 1.2385 - accuracy: 0.6095
Epoch 19/60
 - 114s - loss: 1.2155 - accu

<keras.callbacks.callbacks.History at 0x13d8ebd10>

### Generate poem

In [18]:
# Temperature - https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
def sample(preds, temperature):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [111]:
# Generate poem from RNN model
def generate_poem(seed_text, temp): 
    output = seed_text
    # Keep in track of number of lines generated
    lines = 1
    for i in range(10000):
        # Encode the current generated text
        encoded = [char_indices[char] for char in output]
        encoded = pad_sequences([encoded], maxlen=seq_len, truncating='pre')
        encoded = to_categorical(encoded, num_classes=len(char_indices))
        # Predict next character (using temperature)
        pred = model.predict(encoded, verbose=0)[0]
        next_index = sample(pred, temp)
        next_char = indices_char[next_index]
        output += next_char
        if next_char == '\n': 
            lines += 1
        # Break once we get 14 lines
        if lines == 14: 
            break
    
    # Handles final punctuation - make it a period if it is not 
    if output[-2] in string.punctuation: 
        if output[-2] != '.' or output[-2] != '!' or output[-2] != '?': 
            output = output[:-2] + '.'
        else: 
            output = output[:-1]
    else: 
        output = output[:-1] + '.'
    
    # Capitalize first letter of each line
    final_output = '\n'.join([line.capitalize() for line in output.splitlines()])

    return final_output

In [180]:
poem_1 = generate_poem("shall i compare thee to a summer's day?\n", 1.5)

In [181]:
print(poem_1)

Shall i compare thee to a summer's day?
To-most hend viowing her himed but tend,
Give's woold my judgment's wound faloverys,
Ye in my ranse thy quocter tyranning give,
The famess art my doy in turns straight,
Bo that be, of painy with forture of edoe,
The many a comperit my chisht'st meries,
Those deam hip and hendarly seemed bund.
Thet world it see kind to hime?
Is thou moon, 'tis no wasce writ doth pride.
Most love's fire diabeld's blunt no tonfore!
Where fooly forw in oquent to ellabtel?
How man staly i never fluth will alat shade,
Bith is to true find it gest'st by sicce.


In [188]:
poem_2 = generate_poem("shall i compare thee to a summer's day?\n", 0.75)

In [189]:
print(poem_2)

Shall i compare thee to a summer's day?
To hide remove thee thy sweet follows rivered,
And stails as a quyiour there removed
The thought to me, behire bare will be forted,
For she armble gazer on self-lie tencher,
And love in the very power more me,
And therefore have how truth love is in their ranter,
Whilst thy self thou grow's compair thy woe,
Be one faired in their power thy prevase,
And then we pirgly appireth part were.
Or impilit an thy crest, they horns,
The self that virgent assair of the image,
And sin my heart's prince im in thy compost,
Thy ridy in party for i think doth lies.


In [184]:
poem_3 = generate_poem("shall i compare thee to a summer's day?\n", 0.25)

In [185]:
print(poem_3)

Shall i compare thee to a summer's day?
Then my song fear why behies his scope,
Whom then mine eye is still when the reginge:
The thought in their more with the worlds to swort,
When the charger harst i am a wonarding flee.
When thou art to ways my mistress' eye lies,
Where will the betreful she lice they wills to tongured,
And still now to the world with this postick,
And thou art the their change the dewell excused
By the thoughts of thy beauty slivined thy face,
For his great beauty's hours more than time
Who have i my lovely on thy stoply her with man.
Thou best how thy presare the change of truth,
For when i say my love to the heartured sweet.
