In [1]:
# Some of the methods for this problem were reused from homework 5

import pickle
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation, Lambda
import numpy as np
from keras import optimizers

Using TensorFlow backend.


In [2]:
letter_data = pickle.load(open("data/letter_data.p", "rb"))

In [3]:
# Dictionaries to convert between letter and index
letter_int = {}
int_letter = {}
i = 0
for poem in letter_data:
    for letter in poem:
        if letter not in letter_int:
            letter_int[letter] = i
            int_letter[i] = letter
            i += 1

In [4]:
# Returns one-hot-encoded feature representation of the specified word given
# a dictionary mapping words to their one-hot-encoded index.
def get_word_repr(letter_to_int, word):
    unique_words = letter_to_int.keys()
    # Return a vector that's zero everywhere besides the index corresponding to <word>
    feature_representation = np.zeros(len(unique_words))
    feature_representation[letter_to_int[word]] = 1
    return feature_representation

In [5]:
def generate_traindata(word_list, word_to_index, window_size=40, skip = 7):
    """
    Generates training data for Skipgram model (sort of).

    Arguments:
        word_list:     Sequential list of letters (strings).
        word_to_index: Dictionary mapping words to their corresponding index
                       in a one-hot-encoded representation of our corpus.

        window_size:   Size of Skipgram window.
        
        skip:          Skip every skip characters 

    Returns:
        (trainX, trainY):     A pair of matrices (trainX, trainY) containing training
                              points (one-hot-encoded vectors representing individual words) and
                              their corresponding labels (also one-hot-encoded vectors representing words).
    """
    trainX = []
    trainY = []
    for i in range(window_size, len(word_list), skip):
        curr_word = word_list[i]
        curr_X = []
        for j in range(-window_size, 0):
            if j != 0 and i + j >= 0 and i + j < len(word_list):
                adjacent_word = word_list[i + j]
                curr_X.append(get_word_repr(word_to_index, adjacent_word))
        trainX.append(curr_X)
        trainY.append(get_word_repr(word_to_index, curr_word))
        
    return (np.array(trainX), np.array(trainY))

In [6]:
# Create the training set
unit = True
train_x = -1
train_y = -1
for poem in letter_data:
    poem_train_x, poem_train_y = generate_traindata(poem, letter_int)
    if unit:
        train_x = poem_train_x
        train_y = poem_train_y
        unit = False
    else:
        train_x = np.concatenate((train_x, poem_train_x))
        train_y = np.concatenate((train_y, poem_train_y))

In [7]:
# Generate the neural network
model = Sequential()
model.add(LSTM(180, input_shape = (len(train_x[0]), len(train_x[0][0]))))
model.add(Dense(len(train_y[0])))
model.add(Activation('softmax'))
model.add(Lambda(lambda x: x / 1.5))
model.summary()

model.compile(loss = "categorical_crossentropy", optimizer = "rmsprop", metrics = ['accuracy'])
model.fit(train_x, train_y, epochs = 75, batch_size = 128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 180)               157680    
_________________________________________________________________
dense_1 (Dense)              (None, 38)                6878      
_________________________________________________________________
activation_1 (Activation)    (None, 38)                0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 38)                0         
Total params: 164,558
Trainable params: 164,558
Non-trainable params: 0
_________________________________________________________________
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
E

Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.callbacks.History at 0x7fdc73894da0>

In [8]:
# Convert a line to one hot form
def convert_line_to_one_hot(line, letter_int):
    output = []
    for letter in line:
        output.append(get_word_repr(letter_int, letter))
    return np.array(output)

In [9]:
# Convert from one hot to letter
def vect_to_letter(vect, int_letter):
    max_index = 0
    max_value = 0
    for curr_index, curr_value in enumerate(vect):
        if max_value < curr_value:
            max_index = curr_index
            max_value = curr_value
    return int_letter[max_index]

In [10]:
# Given a RNN generates a shakespeare sonnet
def generate_poem(RNN, letter_int, int_letter):
    # Initial line to seed 
    first_line = "shall i compare thee to a summer\'s day?\n"
    
    curr_line = convert_line_to_one_hot(first_line, letter_int)
    curr_poem = first_line
    line_num = 2
    while line_num < 15:
        # Predict the next character
        next_char = RNN.predict(np.array([curr_line]))[0]
        # Update curr_line and curr_poem
        curr_poem += vect_to_letter(next_char, int_letter)
        if vect_to_letter(next_char, int_letter) == "\n":
            line_num += 1
        curr_line = convert_line_to_one_hot(curr_poem[-40:], letter_int)
    
    return curr_poem

In [11]:
poem = generate_poem(model, letter_int, int_letter)
print(poem)

shall i compare thee to a summer's day?
the fremme then menp's fille whe brackey my ming,
since hack oo sulleds re ppornes withing in lleds,
and hrandy theer avered what hir pore,
whin thou wald be farton worth farring preass,
bot am im chindoun hayey in thee part,
fom choummyes thou stif there thing in dee,
and is llove bititish or sulloud hiedome have,
shat thou lats thoug and be a'th faice,
the areast of oo his foull doun all:
leans you drus an ille ey hatt,
you all you mond reer thine in the weel,
coom in therain llogg talllowhed bllodsers menjurnowndo sprobt,
whan stolll seapt tree lovery herring gicing,

