In [1]:
import string

# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r', encoding='utf-8')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# turn a doc into clean tokens
def clean_doc(doc):
	# replace '--' with a space ' '
	doc = doc.replace('--', ' ')
	# split into tokens by white space
	tokens = doc.split()
	# remove punctuation from each token
	table = str.maketrans('', '', string.punctuation)
	tokens = [w.translate(table) for w in tokens]
	# remove remaining tokens that are not alphabetic
	tokens = [word for word in tokens if word.isalpha()]
	# make lower case
	tokens = [word.lower() for word in tokens]
	return tokens

# save tokens to file, one dialog per line
def save_doc(lines, filename):
	data = '\n'.join(lines)
	file = open(filename, 'w', )
	file.write(data)
	file.close()

# load document
in_filename = 'conan.txt'
doc = load_doc(in_filename)
print(doc[:200])

# clean document
tokens = clean_doc(doc)
print(tokens[:200])
print('Total Tokens: %d' % len(tokens))
print('Unique Tokens: %d' % len(set(tokens)))

# organize into sequences of tokens
length = 50 + 1
sequences = list()
for i in range(length, len(tokens)):
	# select sequence of tokens
	seq = tokens[i-length:i]
	# convert into a line
	line = ' '.join(seq)
	# store
	sequences.append(line)
print('Total Sequences: %d' % len(sequences))

# save sequences to file
out_filename = 'conan_rew.txt'
save_doc(sequences, out_filename)

﻿Of that epoch known by the Nemedian chroniclers as the Pre-Cataclysmic
Age, little is known except the latter part, and that is veiled in the
mists of legendry. Known history begins with the waning o
['that', 'epoch', 'known', 'by', 'the', 'nemedian', 'chroniclers', 'as', 'the', 'precataclysmic', 'age', 'little', 'is', 'known', 'except', 'the', 'latter', 'part', 'and', 'that', 'is', 'veiled', 'in', 'the', 'mists', 'of', 'legendry', 'known', 'history', 'begins', 'with', 'the', 'waning', 'of', 'the', 'precataclysmic', 'civilization', 'dominated', 'by', 'the', 'kingdoms', 'of', 'kamelia', 'valusia', 'verulia', 'grondar', 'thule', 'and', 'commoria', 'these', 'peoples', 'spoke', 'a', 'similar', 'language', 'arguing', 'a', 'common', 'origin', 'there', 'were', 'other', 'kingdoms', 'equally', 'civilized', 'but', 'inhabited', 'by', 'different', 'and', 'apparently', 'older', 'races', 'the', 'barbarians', 'of', 'that', 'age', 'were', 'the', 'picts', 'who', 'lived', 'on', 'islands', 'far', 'out',

In [3]:
from numpy import array
from pickle import dump
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Dropout
from keras.layers import Embedding
from keras.callbacks import ModelCheckpoint

# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# load
in_filename = 'conan_rew.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')

# integer encode sequences of words
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
sequences = tokenizer.texts_to_sequences(lines)
# vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# separate into input and output
sequences = array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)
seq_length = X.shape[1]

# define model
model = Sequential()
model.add(Embedding(vocab_size, 100, input_length=seq_length))
model.add(LSTM(256)) #, return_sequences=True))
#model.add(LSTM(100))
#model.add(Dense(100, activation='relu'))
model.add(Dropout(0.6))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model


# define the checkpoint
filepath="weights-words-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

model.fit(X, y, batch_size=32, epochs=100, callbacks=callbacks_list)

# save the model to file
#model.save('model_conan.h5')
# save the tokenizer
dump(tokenizer, open('conan.pkl', 'wb'))

KeyboardInterrupt: 

In [4]:
from random import randint
from pickle import load
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# generate a sequence from a language model
def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
	result = list()
	in_text = seed_text
	# generate a fixed number of words
	for _ in range(n_words):
		# encode the text as integer
		encoded = tokenizer.texts_to_sequences([in_text])[0]
		# truncate sequences to a fixed length
		encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
		# predict probabilities for each word
		yhat = model.predict_classes(encoded, verbose=0)
		# map predicted word index to word
		out_word = ''
		for word, index in tokenizer.word_index.items():
			if index == yhat:
				out_word = word
				break
		# append to input
		in_text += ' ' + out_word
		result.append(out_word)
	return ' '.join(result)

# load cleaned text sequences
in_filename = 'conan_rew.txt'
doc = load_doc(in_filename)
lines = doc.split('\n')
seq_length = len(lines[0].split()) - 1

# load the model
model = load_model('weights-words-28-1.7095.hdf5')

# load the tokenizer
tokenizer = load(open('conan.pkl', 'rb'))

# select a seed text
seed_text = lines[randint(0,len(lines))]
print(seed_text + '\n')

# generate new text
generated = generate_seq(model, tokenizer, seq_length, seed_text, 200)
print(generated)

W0723 10:18:45.500301 14600 deprecation_wrapper.py:119] From E:\Anaconda\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0723 10:18:45.525233 14600 deprecation_wrapper.py:119] From E:\Anaconda\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0723 10:18:45.711734 14600 deprecation_wrapper.py:119] From E:\Anaconda\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0723 10:18:51.488274 14600 deprecation_wrapper.py:119] From E:\Anaconda\lib\site-packages\keras\backend\tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0723 10:18:51.496253 14600 deprecation.py:506] From E:\Anaconda\lib\site-packages\keras\backend\t

naked slavegirls who were the daughters of kings conquest and the acquiring of wealth altered not the pict out of the ruins of the crushed civilization no new culture arose phoenixlike the dark hands which shattered the artistic glories of the conquered never tried to copy them though he sat among

the glittering ruins of shattered palaces and clad in gorm gorm by their copper axes but a hundred years later in the seventyfive years of the amazons the kushites the atlaians and the hybrid empire of zembabwe between the north the lemurians are evolving in distorted folklore and the pictish empire where he had been savage first thrust in the north of the hyborians have urged that the name of the world to the south the lemurians are toiling as slaves of the blond arctic savages have driven the remaining hyborian tribes are beginning to be harried by gigantic blond savages the earlier pharaohs boasted descent the western world have dominated to the eastern empire of aquilonia and this mixing