In [9]:
# Small LSTM Network to Generate Text for Alice in Wonderland
import numpy
import re
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
# load ascii text and covert to lowercase
discordf = "../messages/discord-messages.txt"
discord = open(discordf, 'r', encoding='utf-8').read()
fbf = "../messages/facebook-messages.txt"
fb = open(fbf, 'r', encoding='utf-8').read()
essayf = "../messages/essays.txt"
essay = open(essayf, 'r', encoding='utf-8').read()
raw_text = discord.lower() + "\n" + fb.lower() + "\n" + essay.lower()
raw_text = raw_text.encode("ascii", "ignore").decode()#remove any non ascii characters.
raw_text = re.sub(r"[~#$%&*+;<=>\[\\^_\]`{|}0-9@/]","",raw_text)#strip out some ascii characters that aren't super important.
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
#print(str(char_to_int))
int_to_char = dict((i, c) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)
# prepare the dataset of input to output pairs encoded as integers
seq_length = 300
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

Total Characters:  1529921
Total Vocab:  38
Total Patterns:  1529621


In [10]:
# define the LSTM model
model = Sequential()
model.add(LSTM(300, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(300, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(300, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(300))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
#model.load_weights("checkpoints\weights-04-1.6629-bigger.hdf5")
# define the checkpoint
filepath="checkpoints/weights-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=16, batch_size=256, callbacks=callbacks_list)
model.save("model-full.h5")

Epoch 1/16
Epoch 00001: loss improved from inf to 2.31134, saving model to checkpoints\weights-01-2.3113-bigger.hdf5
Epoch 2/16
Epoch 00002: loss improved from 2.31134 to 1.78626, saving model to checkpoints\weights-02-1.7863-bigger.hdf5
Epoch 3/16
Epoch 00003: loss improved from 1.78626 to 1.67422, saving model to checkpoints\weights-03-1.6742-bigger.hdf5
Epoch 4/16
Epoch 00004: loss improved from 1.67422 to 1.61223, saving model to checkpoints\weights-04-1.6122-bigger.hdf5
Epoch 5/16
Epoch 00005: loss improved from 1.61223 to 1.57119, saving model to checkpoints\weights-05-1.5712-bigger.hdf5
Epoch 6/16
Epoch 00006: loss improved from 1.57119 to 1.54286, saving model to checkpoints\weights-06-1.5429-bigger.hdf5
Epoch 7/16
Epoch 00007: loss improved from 1.54286 to 1.52125, saving model to checkpoints\weights-07-1.5212-bigger.hdf5
Epoch 8/16
Epoch 00008: loss improved from 1.52125 to 1.50334, saving model to checkpoints\weights-08-1.5033-bigger.hdf5
Epoch 9/16
Epoch 00009: loss improve

In [4]:
model = Sequential()
model.add(LSTM(300, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(300, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(300, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(300))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.load_weights("checkpoints\weights-13-1.4986-bigger.hdf5")
model.save('model-full.h5')

In [16]:
import tensorflow as tf
import sys
#load the lstm
model = tf.keras.models.load_model('model-full.h5')
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
print("\n\n")
temperature = 0.04
# generate characters
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    predictions = model.predict(x, verbose=0)
    
    predictions = predictions / temperature
    predicted_i = tf.random.categorical(predictions,num_samples=1)[-1,0].numpy()
    index = numpy.argmax(predictions)
    
    #print("Index: " + str(index) + " Predicted temp:" + str(predicted_i))
    
    result = int_to_char[predicted_i]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(predicted_i)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" l
starcraft 
didn't hear that, halo  was good times
its up there for me, probably behind  bf bad company  and tf
https:youtu.betjr-fcgjtg?tms byun in sc is probably the most impressed i've ever been with someone's skill at a game.
the first  mins where after where i linked is just crazy
oh yeah, sc  "



would be a thing to the problem of the ":nd then it used to be a bit xith the problem of the agriculture that stch a and not a lot of a decent point in the ma' and then completely the  discord is because it is ) i probably don't think it was your situations and really got the slilled in the de and the ezperiment for the problem is like a social :p
the control problem (for the s. i think it was a decent universal way to be very like ? not to be a : i need to be a bit sure that would be not in the derection in the s) i guess it was some , kind of completely many 
years ago in the sound video stuff that the first hard things me a few : xeah, i mean it was some pqoblem and then pro