In [1]:
# Load Larger LSTM network and generate text
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# load ascii text and covert to lowercase
filename = "trump.txt"
raw_text = open(filename, encoding = "ISO-8859-1").read()
raw_text = raw_text.lower()

In [3]:
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [4]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  1269467
Total Vocab:  44


In [5]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  1269367


In [6]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [7]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

In [8]:
# load the network weights
filename = "checkpoint-10-1.6465-trump.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [9]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed:
" uild up our communities to serve our citizens and to celebrate american greatness as a shining examp "


In [10]:
def sample_prediction(prediction):
# Get rand index from preds based on its prob distribution.
# Params
# ——
# prediction (array (array)): array of length 1 containing array of probs that sums to 1

# Returns
# ——-
# rnd_idx (int): random index from prediction[0]

# Notes
# —–
# Helps to solve problem of repeated outputs.

# len(prediction) = 1
# len(prediction[0]) >> 1
    X = prediction[0] # sum(X) is approx 1
    rnd_idx = numpy.random.choice(len(X), p=X)
    return rnd_idx

for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    #index = numpy.argmax(prediction)
    index = sample_prediction(prediction)
    result = int_to_char[index]
    #seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\nDone.")

nnee about eefnieammer bra  and coneedt center vote thanks the tsue ree louerson  reiisinul have from uhere kiby wat sense hnneciate how more people the press hn shows for iipsdlamd naxt
you all of luch
donald trump america bnokite wwl
jaz jelpy
atruail rasings he surprisl iuc navion confress and toual job he will yso a mess bodfamain on and i have insrppe  ttummao cameway ado democrats sone ilmegal immigrantsiyeehny offocuu show as the wifes probe
make america great again celebagpe bod fridt 1billion gites out and ge
also is newt markypiletouz miami pnotly cvs a vhate 0prisice you leady and is totgh a tptonse oewaydovmeamly vhete is abrolutely winning koin
je
i was not you political secsets trump contrantly a now thome i am stated resurns are necdssary pustsi just amways fame we are tie time to medteheoe pilekits to it time more money
i will gr textsitg amp more
pn acme are pbid dasaleam moneay positive on edonomitaling crugs
but having years redord fftsorie man to csine everyone the 