In [92]:
import numpy as np
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

BORDER = "==============================================================="

In [93]:
def load_data(filename):
    raw_text = open(filename).read()
    text_list = raw_text.strip().split('\n')
    return text_list

In [94]:
def remove_int(text):
    '''
    Remove numbers from strings
    '''
    new_text = []
    for line in text:
        # Remove integer values
        no_digits = ''.join([i for i in line if not i.isdigit()])

        # Remove punctuation
        new_text.append(no_digits)
    return new_text

In [95]:
def remove_empty(text):
    '''
    Removes all empty string
    '''
    new_text = []
    for line in text:
        if line != '':
            new_text.append(line)
    return new_text

In [96]:
def process_data_RNN(text):
    '''
    Create fixed length training sequences of length 40 char from the sonnet
    corpus.

    Input: Text file in the form of list of words

    Output: X, Y, dataX, dataY, int_to_char, n_vocab
    '''
    
    new_text_list = remove_int(text)
    new_text_list = remove_empty(new_text_list)
    new_text = '\n'.join(new_text_list)
    
    print(BORDER)
    print("Processed text: ", new_text)
    print(BORDER)

    # create mapping of unique chars to integers, and a reverse mapping
    chars = sorted(list(set(new_text)))
    char_to_int = dict((c, i) for i, c in enumerate(chars))
    int_to_char = dict((i, c) for i, c in enumerate(chars))

    # summarize the loaded data
    n_chars = len(new_text)
    n_vocab = len(chars)
    print("Total Characters: ", n_chars)
    print("Total Vocab: ", n_vocab)
    print("Int to char: ", int_to_char)

    # prepare the dataset of input to output pairs encoded as integers
    seq_length = 40
    dataX = []
    dataY = []
    for i in range(0, n_chars - seq_length):
        seq_in = new_text[i:i + seq_length]
        seq_out = new_text[i + seq_length]
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])
    n_patterns = len(dataX)
    print("Total Patterns: ", n_patterns)
    print(BORDER)

    X = np.zeros((n_patterns, seq_length, n_vocab))
    y = np.zeros((n_patterns, n_vocab))
    for i, sentence in enumerate(dataX):
        for t, ind in enumerate(sentence):
            X[i, t, ind] = 1
        y[i, dataY[i]] = 1


    return X, y, dataX, dataY, int_to_char, char_to_int

In [97]:
def train_LSTM(X, y, verbose=0):
    '''
    Takes training data X and Y and returns the fitted LSTM model

    Input:
        X : a list of sequences of int
        Y : one-hot encoding of the int coming after the sequence
    '''

    # Take a submit of sequences
#     X = X[0::10]
#     y = y[0::10]

    # define the LSTM model
    model = Sequential()
    model.add(LSTM(128, input_shape=(X.shape[1], X.shape[2])))
    model.add(Dense(y.shape[1], activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

    # fit the model
    model.fit(X, y, epochs=60, batch_size=128, verbose=verbose)
    return model

In [98]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [99]:
def generate_text(model, dataX, int_to_char, verbose=0):
    '''
    Given model, dataX, int_to_char, n_vocab returns generated_text using
    predict function

    Input:
        model: the LSTM model that we trained
        dataX: list of sequences
        int_to_char: a dictionary matching interger to specific character
        n_vocab: number of unique characters we have

    Output: generate_text as string

    '''
    print(BORDER)
    print("Generating text")
    n_vocab = len(int_to_char)
    diversity = 0.5

    # pick a random seed
    start = np.random.randint(0, len(dataX) - 1)
    pattern = dataX[start]
    seq = [int_to_char[value] for value in pattern]
    size = len(pattern)

    if verbose == 1:
        print("Seed: ", ''.join(seq))

    # generate characters
    for i in range(600):
        # Create and normalize x to be input of RNN
        x = np.zeros((1, size, n_vocab))
        for t, char in enumerate(pattern):
            x[0, t, char] = 1

        # Make prediction using trained model
        prediction = model.predict(x, verbose=verbose)[0]
        index = sample(prediction, diversity)

        # Convert prediction to character
        result = int_to_char[index]

        # Add prediction to pattern and set to size 40
        pattern.append(index)
        pattern = pattern[1:1 + size]

        # Add result to seq
        seq.append(result)

        if verbose == 1:
            print("pred: ", prediction[0])
            print("selected index: ", index)
            print("selected char: ", result)
            print("new pattern: ",
                  ''.join([int_to_char[value] for value in pattern]))

    # Return seq as string
    return ''.join(seq)

In [100]:
def save_textfile(filename, text):
    '''
    Given filename and text, save text in file

    Input: filename and text as string
    '''
    f = open(filename, 'w')
    f.write(text)
    f.close()
    return 0

In [101]:
file = 'data/shakespeare.txt'
save = 'generated/shakespeare_2.txt'
verbose = 1

text_list = load_data(file)
(X, y, dataX, dataY, int_to_char, char_to_int) = process_data_RNN(text_list)

Processed text:  From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed'st thy light's flame with self-substantial fuel,
Making a famine where abundance lies,
Thy self thy foe, to thy sweet self too cruel:
Thou that art now the world's fresh ornament,
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And tender churl mak'st waste in niggarding:
  Pity the world, or else this glutton be,
  To eat the world's due, by the grave and thee.
                   
When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a tattered weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say within thine own deep sunken eyes,
Were an all-eating shame, and thriftless prai

Total Patterns:  97309


In [102]:

model = train_LSTM(X, y, verbose=verbose)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [103]:
generated = generate_text(model, dataX, int_to_char, verbose=verbose)
save_textfile(save, generated)
print(generated)

Generating text
Seed:  hine,
And I my self am mortgaged to thy 
pred:  2.2667201e-07
selected index:  53
selected char:  s
new pattern:  ine,
And I my self am mortgaged to thy s
pred:  3.394084e-07
selected index:  49
selected char:  o
new pattern:  ne,
And I my self am mortgaged to thy so
pred:  6.6663524e-05
selected index:  52
selected char:  r
new pattern:  e,
And I my self am mortgaged to thy sor
pred:  9.96245e-05
selected index:  39
selected char:  e
new pattern:  ,
And I my self am mortgaged to thy sore
pred:  0.00012809876
selected index:  6
selected char:  ,
new pattern:  
And I my self am mortgaged to thy sore,
pred:  0.9941379
selected index:  0
selected char:  

new pattern:  And I my self am mortgaged to thy sore,

pred:  0.000371352
selected index:  30
selected char:  T
new pattern:  nd I my self am mortgaged to thy sore,
T
pred:  1.0662574e-10
selected index:  42
selected char:  h
new pattern:  d I my self am mortgaged to thy sore,
Th
pred:  3.3635433e-06
selected index

pred:  0.09585329
selected index:  39
selected char:  e
new pattern:   whet watt in howe, sure ert wast counte
pred:  0.027482398
selected index:  52
selected char:  r
new pattern:  whet watt in howe, sure ert wast counter
pred:  0.019261923
selected index:  1
selected char:   
new pattern:  het watt in howe, sure ert wast counter 
pred:  0.002356286
selected index:  49
selected char:  o
new pattern:  et watt in howe, sure ert wast counter o
pred:  1.06405205e-05
selected index:  57
selected char:  w
new pattern:  t watt in howe, sure ert wast counter ow
pred:  1.8550934e-05
selected index:  6
selected char:  ,
new pattern:   watt in howe, sure ert wast counter ow,
pred:  0.9340181
selected index:  0
selected char:  

new pattern:  watt in howe, sure ert wast counter ow,

pred:  0.0004402047
selected index:  12
selected char:  A
new pattern:  att in howe, sure ert wast counter ow,
A
pred:  2.3000219e-07
selected index:  48
selected char:  n
new pattern:  tt in howe, sure ert wast count

pred:  0.9377433
selected index:  0
selected char:  

new pattern:  ingie hath moke to my aid thee arthere,

pred:  0.00015649207
selected index:  12
selected char:  A
new pattern:  ngie hath moke to my aid thee arthere,
A
pred:  3.202751e-07
selected index:  48
selected char:  n
new pattern:  gie hath moke to my aid thee arthere,
An
pred:  3.8643606e-07
selected index:  38
selected char:  d
new pattern:  ie hath moke to my aid thee arthere,
And
pred:  2.272063e-06
selected index:  1
selected char:   
new pattern:  e hath moke to my aid thee arthere,
And 
pred:  1.7873908e-05
selected index:  55
selected char:  u
new pattern:   hath moke to my aid thee arthere,
And u
pred:  9.284577e-08
selected index:  52
selected char:  r
new pattern:  hath moke to my aid thee arthere,
And ur
pred:  4.3904e-06
selected index:  59
selected char:  y
new pattern:  ath moke to my aid thee arthere,
And ury
pred:  9.942474e-07
selected index:  1
selected char:   
new pattern:  th moke to my aid thee arther

pred:  2.124145e-05
selected index:  35
selected char:  a
new pattern:  te thut both sofrows of or mowh,
And sea
pred:  2.595443e-05
selected index:  43
selected char:  i
new pattern:  e thut both sofrows of or mowh,
And seai
pred:  9.435346e-06
selected index:  53
selected char:  s
new pattern:   thut both sofrows of or mowh,
And seais
pred:  6.1489627e-06
selected index:  1
selected char:   
new pattern:  thut both sofrows of or mowh,
And seais 
pred:  2.3563648e-07
selected index:  54
selected char:  t
new pattern:  hut both sofrows of or mowh,
And seais t
pred:  1.0661846e-10
selected index:  42
selected char:  h
new pattern:  ut both sofrows of or mowh,
And seais th
pred:  5.4097722e-08
selected index:  39
selected char:  e
new pattern:  t both sofrows of or mowh,
And seais the
pred:  2.2443542e-06
selected index:  1
selected char:   
new pattern:   both sofrows of or mowh,
And seais the 
pred:  3.0153574e-06
selected index:  54
selected char:  t
new pattern:  both sofrows of or m

pred:  3.7464977e-07
selected index:  49
selected char:  o
new pattern:  s ay my sove aid and loke,
Thes broot ho
pred:  4.238337e-06
selected index:  47
selected char:  m
new pattern:   ay my sove aid and loke,
Thes broot hom
pred:  3.2722915e-06
selected index:  1
selected char:   
new pattern:  ay my sove aid and loke,
Thes broot hom 
pred:  2.2813465e-05
selected index:  38
selected char:  d
new pattern:  y my sove aid and loke,
Thes broot hom d
pred:  5.048644e-06
selected index:  49
selected char:  o
new pattern:   my sove aid and loke,
Thes broot hom do
pred:  1.3901849e-06
selected index:  54
selected char:  t
new pattern:  my sove aid and loke,
Thes broot hom dot
pred:  2.4814785e-06
selected index:  42
selected char:  h
new pattern:  y sove aid and loke,
Thes broot hom doth
pred:  3.6774723e-06
selected index:  1
selected char:   
new pattern:   sove aid and loke,
Thes broot hom doth 
pred:  8.055353e-06
selected index:  54
selected char:  t
new pattern:  sove aid and loke,
T

pred:  6.8536605e-07
selected index:  39
selected char:  e
new pattern:   the art tho goul,
Thac of the byoun the
pred:  4.124464e-05
selected index:  1
selected char:   
new pattern:  the art tho goul,
Thac of the byoun the 
pred:  1.9749124e-05
selected index:  57
selected char:  w
new pattern:  he art tho goul,
Thac of the byoun the w
pred:  2.2929996e-06
selected index:  35
selected char:  a
new pattern:  e art tho goul,
Thac of the byoun the wa
pred:  8.476956e-07
selected index:  52
selected char:  r
new pattern:   art tho goul,
Thac of the byoun the war
pred:  6.2887834e-06
selected index:  54
selected char:  t
new pattern:  art tho goul,
Thac of the byoun the wart
pred:  1.5660733e-05
selected index:  39
selected char:  e
new pattern:  rt tho goul,
Thac of the byoun the warte
pred:  6.4761307e-06
selected index:  1
selected char:   
new pattern:  t tho goul,
Thac of the byoun the warte 
pred:  3.092295e-05
selected index:  54
selected char:  t
new pattern:   tho goul,
Thac of t

pred:  1.9008578e-06
selected index:  49
selected char:  o
new pattern:   woth the wort,
To thand the bee thend o
pred:  2.559862e-07
selected index:  40
selected char:  f
new pattern:  woth the wort,
To thand the bee thend of
pred:  2.1261835e-06
selected index:  1
selected char:   
new pattern:  oth the wort,
To thand the bee thend of 
pred:  1.0095865e-05
selected index:  36
selected char:  b
new pattern:  th the wort,
To thand the bee thend of b
pred:  1.5092568e-06
selected index:  52
selected char:  r
new pattern:  h the wort,
To thand the bee thend of br
pred:  1.3258965e-05
selected index:  49
selected char:  o
new pattern:   the wort,
To thand the bee thend of bro
pred:  2.8363e-05
selected index:  55
selected char:  u
new pattern:  the wort,
To thand the bee thend of brou
pred:  2.0577327e-05
selected index:  38
selected char:  d
new pattern:  he wort,
To thand the bee thend of broud
pred:  6.870972e-05
selected index:  1
selected char:   
new pattern:  e wort,
To thand the b

pred:  0.00024198837
selected index:  1
selected char:   
new pattern:  ate,
And sost the cencenou pour sichine 
pred:  0.012510012
selected index:  38
selected char:  d
new pattern:  te,
And sost the cencenou pour sichine d
pred:  0.00084277824
selected index:  39
selected char:  e
new pattern:  e,
And sost the cencenou pour sichine de
pred:  3.5863937e-05
selected index:  39
selected char:  e
new pattern:  ,
And sost the cencenou pour sichine dee
pred:  0.00012368393
selected index:  53
selected char:  s
new pattern:  
And sost the cencenou pour sichine dees
pred:  0.003923318
selected index:  53
selected char:  s
new pattern:  And sost the cencenou pour sichine deess
pred:  0.014951087
selected index:  6
selected char:  ,
new pattern:  nd sost the cencenou pour sichine deess,
pred:  0.9325322
selected index:  0
selected char:  

new pattern:  d sost the cencenou pour sichine deess,

pred:  0.00042918074
selected index:  13
selected char:  B
new pattern:   sost the cencenou pour sich

pred:  3.5060404e-07
selected index:  53
selected char:  s
new pattern:  tore thee ofream woll mome of tote,
Whas
pred:  3.2275263e-06
selected index:  1
selected char:   
new pattern:  ore thee ofream woll mome of tote,
Whas 
pred:  8.427135e-07
selected index:  59
selected char:  y
new pattern:  re thee ofream woll mome of tote,
Whas y
pred:  2.9262269e-08
selected index:  49
selected char:  o
new pattern:  e thee ofream woll mome of tote,
Whas yo
pred:  1.3658075e-09
selected index:  55
selected char:  u
new pattern:   thee ofream woll mome of tote,
Whas you
pred:  5.0967607e-08
selected index:  52
selected char:  r
new pattern:  thee ofream woll mome of tote,
Whas your
pred:  1.1168454e-05
selected index:  1
selected char:   
new pattern:  hee ofream woll mome of tote,
Whas your 
pred:  3.7365019e-06
selected index:  59
selected char:  y
new pattern:  ee ofream woll mome of tote,
Whas your y
pred:  3.918275e-07
selected index:  49
selected char:  o
new pattern:  e ofream woll mome 

pred:  1.3008528e-06
selected index:  35
selected char:  a
new pattern:  breath menth monet wile sell:
o me ho ba
pred:  0.00033902994
selected index:  43
selected char:  i
new pattern:  reath menth monet wile sell:
o me ho bai
pred:  0.0011343947
selected index:  52
selected char:  r
new pattern:  eath menth monet wile sell:
o me ho bair
pred:  0.0013027374
selected index:  39
selected char:  e
new pattern:  ath menth monet wile sell:
o me ho baire
pred:  4.7158548e-05
selected index:  38
selected char:  d
new pattern:  th menth monet wile sell:
o me ho baired
pred:  0.0005623834
selected index:  1
selected char:   
new pattern:  h menth monet wile sell:
o me ho baired 
pred:  0.00014744325
selected index:  54
selected char:  t
new pattern:   menth monet wile sell:
o me ho baired t
pred:  1.7964418e-06
selected index:  42
selected char:  h
new pattern:  menth monet wile sell:
o me ho baired th
pred:  4.5544362e-05
selected index:  39
selected char:  e
new pattern:  enth monet wile sel

pred:  0.000117878044
selected index:  1
selected char:   
new pattern:   thot beacend greed:
But pross ind hork 
pred:  1.5463933e-06
selected index:  49
selected char:  o
new pattern:  thot beacend greed:
But pross ind hork o
pred:  6.2151184e-06
selected index:  48
selected char:  n
new pattern:  hot beacend greed:
But pross ind hork on
pred:  2.9049543e-06
selected index:  1
selected char:   
new pattern:  ot beacend greed:
But pross ind hork on 
pred:  1.46212715e-05
selected index:  38
selected char:  d
new pattern:  t beacend greed:
But pross ind hork on d
pred:  2.6130732e-05
selected index:  39
selected char:  e
new pattern:   beacend greed:
But pross ind hork on de
pred:  7.3256146e-05
selected index:  35
selected char:  a
new pattern:  beacend greed:
But pross ind hork on dea
pred:  3.2933394e-06
selected index:  52
selected char:  r
new pattern:  eacend greed:
But pross ind hork on dear
pred:  7.173516e-05
selected index:  53
selected char:  s
new pattern:  acend greed:
But

pred:  2.8506678e-05
selected index:  54
selected char:  t
new pattern:   deare.
                 
Tha s yer th t
pred:  6.36516e-07
selected index:  42
selected char:  h
new pattern:  deare.
                 
Tha s yer th th
pred:  9.8799596e-08
selected index:  39
selected char:  e
new pattern:  eare.
                 
Tha s yer th the
pred:  8.2633405e-06
selected index:  1
selected char:   
new pattern:  are.
                 
Tha s yer th the 
pred:  1.5898899e-05
selected index:  36
selected char:  b
new pattern:  re.
                 
Tha s yer th the b
pred:  3.2938826e-07
selected index:  39
selected char:  e
new pattern:  e.
                 
Tha s yer th the be
pred:  2.1192582e-06
selected index:  53
selected char:  s
new pattern:  .
                 
Tha s yer th the bes
pred:  0.0002734785
selected index:  53
selected char:  s
new pattern:  
                 
Tha s yer th the bess
pred:  0.00024591258
selected index:  1
selected char:   
new pattern:                   
Th

hine,
And I my self am mortgaged to thy sore,
Than whet watt in howe, sure ert wast counter ow,
And ingie hath moke to my aid thee arthere,
And ury erte thut both sofrows of or mowh,
And seais the thas ay my sove aid and loke,
Thes broot hom doth the the art tho goul,
Thac of the byoun the warte the woth the wort,
To thand the bee thend of broud thate,
And sost the cencenou pour sichine deess,
Be tore thee ofream woll mome of tote,
Whas your you breath menth monet wile sell:
o me ho baired theer thot beacend greed:
But pross ind hork on dears on deare.
                 
Tha s yer th the bess be seart in moved gaect,
And siss in sees
