<a href="https://colab.research.google.com/github/alexbrill/tf-train/blob/main/alice_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [2]:
!wget  https://www.gutenberg.org/files/11/11-0.txt -O wonderland.txt

--2021-05-27 14:47:51--  https://www.gutenberg.org/files/11/11-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 174313 (170K) [text/plain]
Saving to: ‘wonderland.txt’


2021-05-27 14:47:52 (841 KB/s) - ‘wonderland.txt’ saved [174313/174313]



In [3]:
filename = "wonderland.txt"

raw_text = open(filename).read()
raw_text = raw_text.lower()

In [4]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [5]:
n_chars = len(raw_text)
n_vocab = len(chars)

print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  164047
Total Vocab:  64


In [6]:
seq_length = 100
dataX = []
dataY = []

for i in range(0, n_chars - seq_length, 1):
        seq_in = raw_text[i:i + seq_length]
        seq_out = raw_text[i + seq_length]
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  163947


In [7]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

# normalize
X = X / float(n_vocab)

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [8]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [9]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [10]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 3.02561, saving model to weights-improvement-01-3.0256.hdf5
Epoch 2/20

Epoch 00002: loss improved from 3.02561 to 2.84716, saving model to weights-improvement-02-2.8472.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.84716 to 2.76522, saving model to weights-improvement-03-2.7652.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.76522 to 2.69747, saving model to weights-improvement-04-2.6975.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.69747 to 2.63658, saving model to weights-improvement-05-2.6366.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.63658 to 2.57903, saving model to weights-improvement-06-2.5790.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.57903 to 2.52419, saving model to weights-improvement-07-2.5242.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.52419 to 2.47626, saving model to weights-improvement-08-2.4763.hdf5
Epoch 9/20

Epoch 00009: loss improved from 2.47626 to 2.43437, saving model to weig

<keras.callbacks.History at 0x7fd5e81dba50>

In [12]:
# load the network weights
filename = "/content/weights-improvement-20-2.0913.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [13]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [17]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

# generate characters

for i in range(1000):
        x = numpy.reshape(pattern, (1, len(pattern), 1))
        x = x / float(n_vocab)
        prediction = model.predict(x, verbose=0)
        index = numpy.argmax(prediction)
        result = int_to_char[index]
        seq_in = [int_to_char[value] for value in pattern]
        sys.stdout.write(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]

print("\nDone.")

Seed:
" ut all he _said_
was, “why is a raven like a writing-desk?”

“come, we shall have some fun now!” tho "
 daded aliee. 
“ho  io you said to toe sooele,” shi gatter sepdiked.
“oh you dno’t then io wou cene to toe toens!”h
toiek to the whuh the mortle oate to the wound beli to the thetg was oo tee toiee and aooeer of the gorse tft ano hor herd the toeee tf the the gar in the toiee and aoo anr anr anr oo the toie, 
“what did t aade the corsouse then ”ou meke,” said the cat.ra liteing the woide afdin then at she was sorting to the thetg was oooeing an inr tiie, “ho wo leke the dormouser ”ou dad toe cane than i sae so toe then io the mortle  a dat oaid to toink to tee the woide and toeer the was afdin to the thitg was aoi aooiersnn the saeted and the pooe afd no the toiee and the soier of the toiee. and the woile the woile to the toeee of the toiee afdin, and the woide the woile to the toeee of the coort, “and then io whs do aoue of the moot of the moos if the mootte tai it then to to