In [1]:
#suppress warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load LSTM network and generate text
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense,Dropout,LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import pandas as pd
import glob
import os

Using TensorFlow backend.


In [3]:
def write_weights(raw_text,seq_length,epoch_num,num,verbose=0):
    #get chars in raw_text
    chars = sorted(list(set(raw_text)))
    
    #get mapping of char to int value and vice versa
    char_to_int = dict((c, i) for i, c in enumerate(chars))
    int_to_char = dict((i, c) for i, c in enumerate(chars))
    
    #print out summary of data
    n_chars = len(raw_text)
    n_vocab = len(chars)
    print("Total Characters: ", n_chars)
    print("Total Vocab (Unique Characters): ", n_vocab)
    
    #get X and Y of data (sequence in and next character)
    #X will be a sequence of characters with length seq_length (given)
    #Y will be the corresponding next character
    dataX = []
    dataY = []
    for i in range(0, n_chars - seq_length, 1):
        seq_in = raw_text[i:i + seq_length]
        seq_out = raw_text[i + seq_length]
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])
    n_patterns = len(dataX)
    #num of patters is the number of text sequences of length seq_length we have
    print("Total Patterns: ", n_patterns,'\n')
    
    # reshape X to be [samples, time steps, features]
    X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
    
    # normalize X (really simple scaler)
    X = X / float(n_vocab)
    
    # do one hot encoding on the output variable
    y = np_utils.to_categorical(dataY)
    
    # define the LSTM model
    model = Sequential()
    model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
    model.add(Dropout(0.2))
    model.add(Dense(y.shape[1], activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    # define the checkpoint, make file names standard
    filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint]
    
    # fit the model
    model.fit(X, y, epochs=epoch_num, batch_size=128, callbacks=callbacks_list)
    
    # define the LSTM model - part 2
    model = Sequential()
    model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
    model.add(Dropout(0.2))
    model.add(Dense(y.shape[1], activation='softmax'))
    
    #get latest file (newest weight)
    list_of_files = glob.glob('../twilightvalefalls/*.hdf5')
    latest_file = max(list_of_files, key=os.path.getctime)
    
    # load the network weights
    filename = latest_file
    model.load_weights(filename)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    # pick a random seed num times (new one for each text generated)
    seeds = []
    for n in range(num):
        start = numpy.random.randint(0, len(dataX)-1)
        pattern = dataX[start]
        s = ''.join([int_to_char[value] for value in pattern])
        if verbose > 0:
            print("Seed #%d:" %(n+1))
            print("\"", s, "\"")
        seeds.append(pattern)
        
    gens = []
    for seed in seeds:
        pattern = seed
        # generate characters
        for i in range(seq_length):
            x = numpy.reshape(pattern, (1, len(pattern), 1))
            x = x / float(n_vocab)
            prediction = model.predict(x, verbose=0)
            index = numpy.argmax(prediction)
            result = int_to_char[index]
            seq_in = [int_to_char[value] for value in pattern]
            pattern.append(index)
            pattern = pattern[1:len(pattern)]
            
        gens.append(pattern)
        
    for text in gens:
        if verbose > 0:
            print(''.join([int_to_char[value] for value in text]))
        
    return [''.join([int_to_char[value] for value in text]) for text in gens]

In [4]:
#RStories RNN
txt_files = glob.glob("rStories/*.txt")
rstories_fulltext = ''
for f in txt_files:
    file = open(f,encoding='utf-8') 
    st = file.read() 
    rstories_fulltext = rstories_fulltext + ' ' + st
    file.close()

In [5]:
rstories_fulltext = rstories_fulltext.replace('\n',' ').replace('        ','').strip()

In [6]:
rstories_texts = write_weights(rstories_fulltext,100,200,10,verbose=1)

Total Characters:  26885
Total Vocab (Unique Characters):  66
Total Patterns:  26785 

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/200

Epoch 00001: loss improved from inf to 3.13311, saving model to weights-improvement-01-3.1331.hdf5
Epoch 2/200

Epoch 00002: loss improved from 3.13311 to 3.07360, saving model to weights-improvement-02-3.0736.hdf5
Epoch 3/200

Epoch 00003: loss improved from 3.07360 to 3.03900, saving model to weights-improvement-03-3.0390.hdf5
Epoch 4/200

Epoch 00004: loss improved from 3.03900 to 2.94614, saving model to weights-improvement-04-2.9461.hdf5
Epoch 5/200

Epoch 00005: loss improved from 2.94614 to 2.89636, saving model to weights-improvement-05-2.8964.hdf5
Epoch 6/200

Epoch 00006: loss improved from 2.89636 to 2.87258, saving model to weights-improvement-0


Epoch 00038: loss improved from 2.23584 to 2.19726, saving model to weights-improvement-38-2.1973.hdf5
Epoch 39/200

Epoch 00039: loss improved from 2.19726 to 2.16530, saving model to weights-improvement-39-2.1653.hdf5
Epoch 40/200

Epoch 00040: loss improved from 2.16530 to 2.12591, saving model to weights-improvement-40-2.1259.hdf5
Epoch 41/200

Epoch 00041: loss improved from 2.12591 to 2.09317, saving model to weights-improvement-41-2.0932.hdf5
Epoch 42/200

Epoch 00042: loss improved from 2.09317 to 2.06771, saving model to weights-improvement-42-2.0677.hdf5
Epoch 43/200

Epoch 00043: loss improved from 2.06771 to 2.03084, saving model to weights-improvement-43-2.0308.hdf5
Epoch 44/200

Epoch 00044: loss improved from 2.03084 to 2.00364, saving model to weights-improvement-44-2.0036.hdf5
Epoch 45/200

Epoch 00045: loss improved from 2.00364 to 1.97661, saving model to weights-improvement-45-1.9766.hdf5
Epoch 46/200

Epoch 00046: loss improved from 1.97661 to 1.93698, saving mode


Epoch 00082: loss improved from 1.25437 to 1.24042, saving model to weights-improvement-82-1.2404.hdf5
Epoch 83/200

Epoch 00083: loss did not improve from 1.24042
Epoch 84/200

Epoch 00084: loss did not improve from 1.24042
Epoch 85/200

Epoch 00085: loss improved from 1.24042 to 1.21638, saving model to weights-improvement-85-1.2164.hdf5
Epoch 86/200

Epoch 00086: loss did not improve from 1.21638
Epoch 87/200

Epoch 00087: loss improved from 1.21638 to 1.17538, saving model to weights-improvement-87-1.1754.hdf5
Epoch 88/200

Epoch 00088: loss improved from 1.17538 to 1.16270, saving model to weights-improvement-88-1.1627.hdf5
Epoch 89/200

Epoch 00089: loss improved from 1.16270 to 1.15390, saving model to weights-improvement-89-1.1539.hdf5
Epoch 90/200

Epoch 00090: loss did not improve from 1.15390
Epoch 91/200

Epoch 00091: loss did not improve from 1.15390
Epoch 92/200

Epoch 00092: loss did not improve from 1.15390
Epoch 93/200

Epoch 00093: loss did not improve from 1.15390
E


Epoch 00191: loss did not improve from 0.76817
Epoch 192/200

Epoch 00192: loss did not improve from 0.76817
Epoch 193/200

Epoch 00193: loss did not improve from 0.76817
Epoch 194/200

Epoch 00194: loss did not improve from 0.76817
Epoch 195/200

Epoch 00195: loss did not improve from 0.76817
Epoch 196/200

Epoch 00196: loss did not improve from 0.76817
Epoch 197/200

Epoch 00197: loss did not improve from 0.76817
Epoch 198/200

Epoch 00198: loss did not improve from 0.76817
Epoch 199/200

Epoch 00199: loss did not improve from 0.76817
Epoch 200/200

Epoch 00200: loss did not improve from 0.76817
Seed #1:
" him could have died. If he had his choice, Death knew Reckless would have preferred to die on one of "
Seed #2:
" kly caked on her face, and her hair was a greasy shade of blonde that could have only come from a bo "
Seed #3:
" ght was clear and cold. Above him, stars sparkled in the dark blue sky, the pale moon shining. Reckl "
Seed #4:
" ises," I said.   This brought me to where

In [7]:
hhg_fulltext = ''
f = 'hhg.txt'
file = open(f)
hhg_fulltext = file.read()
file.close()

In [8]:
hhg_fulltext = hhg_fulltext.replace('THE HITCHHIKER\'S GUIDE TO THE GALAXY\nBY DOUGLAS ADAMS\n','').replace('\n',' ')

In [9]:
hhg_texts = write_weights(hhg_fulltext,10,200,25,verbose=1)

Total Characters:  266523
Total Vocab (Unique Characters):  86
Total Patterns:  266513 

Epoch 1/200

Epoch 00001: loss improved from inf to 3.00484, saving model to weights-improvement-01-3.0048.hdf5
Epoch 2/200

Epoch 00002: loss improved from 3.00484 to 2.84868, saving model to weights-improvement-02-2.8487.hdf5
Epoch 3/200

Epoch 00003: loss improved from 2.84868 to 2.78184, saving model to weights-improvement-03-2.7818.hdf5
Epoch 4/200

Epoch 00004: loss improved from 2.78184 to 2.73365, saving model to weights-improvement-04-2.7337.hdf5
Epoch 5/200

Epoch 00005: loss improved from 2.73365 to 2.68870, saving model to weights-improvement-05-2.6887.hdf5
Epoch 6/200

Epoch 00006: loss improved from 2.68870 to 2.64723, saving model to weights-improvement-06-2.6472.hdf5
Epoch 7/200

Epoch 00007: loss improved from 2.64723 to 2.60862, saving model to weights-improvement-07-2.6086.hdf5
Epoch 8/200

Epoch 00008: loss improved from 2.60862 to 2.57275, saving model to weights-improvement-08


Epoch 00084: loss improved from 1.79119 to 1.78478, saving model to weights-improvement-84-1.7848.hdf5
Epoch 85/200

Epoch 00085: loss did not improve from 1.78478
Epoch 86/200

Epoch 00086: loss improved from 1.78478 to 1.78257, saving model to weights-improvement-86-1.7826.hdf5
Epoch 87/200

Epoch 00087: loss improved from 1.78257 to 1.77752, saving model to weights-improvement-87-1.7775.hdf5
Epoch 88/200

Epoch 00088: loss improved from 1.77752 to 1.77486, saving model to weights-improvement-88-1.7749.hdf5
Epoch 89/200

Epoch 00089: loss improved from 1.77486 to 1.77047, saving model to weights-improvement-89-1.7705.hdf5
Epoch 90/200

Epoch 00090: loss improved from 1.77047 to 1.76822, saving model to weights-improvement-90-1.7682.hdf5
Epoch 91/200

Epoch 00091: loss improved from 1.76822 to 1.76572, saving model to weights-improvement-91-1.7657.hdf5
Epoch 92/200

Epoch 00092: loss improved from 1.76572 to 1.76409, saving model to weights-improvement-92-1.7641.hdf5
Epoch 93/200

Ep


Epoch 00171: loss did not improve from 1.60538
Epoch 172/200

Epoch 00172: loss improved from 1.60538 to 1.60316, saving model to weights-improvement-172-1.6032.hdf5
Epoch 173/200

Epoch 00173: loss did not improve from 1.60316
Epoch 174/200

Epoch 00174: loss improved from 1.60316 to 1.60053, saving model to weights-improvement-174-1.6005.hdf5
Epoch 175/200

Epoch 00175: loss improved from 1.60053 to 1.59974, saving model to weights-improvement-175-1.5997.hdf5
Epoch 176/200

Epoch 00176: loss improved from 1.59974 to 1.59908, saving model to weights-improvement-176-1.5991.hdf5
Epoch 177/200

Epoch 00177: loss improved from 1.59908 to 1.59719, saving model to weights-improvement-177-1.5972.hdf5
Epoch 178/200

Epoch 00178: loss improved from 1.59719 to 1.59655, saving model to weights-improvement-178-1.5966.hdf5
Epoch 179/200

Epoch 00179: loss improved from 1.59655 to 1.59521, saving model to weights-improvement-179-1.5952.hdf5
Epoch 180/200

Epoch 00180: loss did not improve from 1.5