In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
ls drive/'My Drive'/wonderland.txt

'drive/My Drive/wonderland.txt'


In [0]:
!cp drive/'My Drive'/wonderland.txt .

In [0]:
# Larger LSTM Network to Generate Text for Alice in Wonderland
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [13]:
raw_text[:1000]

"project gutenberg's alice's adventures in wonderland, by lewis carroll\n\nthis ebook is for the use of anyone anywhere at no cost and with\nalmost no restrictions whatsoever.  you may copy it, give it away or\nre-use it under the terms of the project gutenberg license included\nwith this ebook or online at www.gutenberg.org\n\n\ntitle: alice's adventures in wonderland\n\nauthor: lewis carroll\n\nposting date: june 25, 2008 [ebook #11]\nrelease date: march, 1994\n[last updated: december 20, 2011]\n\nlanguage: english\n\n\n*** start of this project gutenberg ebook alice's adventures in wonderland ***\n\n\n\n\n\n\n\n\n\n\nalice's adventures in wonderland\n\nlewis carroll\n\nthe millennium fulcrum edition 3.0\n\n\n\n\nchapter i. down the rabbit-hole\n\nalice was beginning to get very tired of sitting by her sister on the\nbank, and of having nothing to do: once or twice she had peeped into the\nbook her sister was reading, but it had no pictures or conversations in\nit, 'and what is the u

In [0]:
import re
raw_text = re.sub(r'[^\w\s\t\n]','',raw_text)

In [15]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
chars

['\n',
 ' ',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '_',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [16]:

chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  154863
Total Vocab:  39


In [19]:
sentence = raw_text.split("\n\n")
from keras.preprocessing.sequence import pad_sequences
sent = ([x for x in sentence if sum(c.isalpha() for c in x)>1])
dataX = []
dataY = []
seq_length = 101
for s in sent:
  if len(s)<101:
    x = [char_to_int[a] for a in s]
    x = pad_sequences([x], maxlen=seq_length)
    dataX.append([a for a in x[0][:-1]])
    dataY.append(x[0][-1])
  else:
    for i in range(0, len(s) - seq_length, 1):
      x = [char_to_int[a] for a in s[i:i + seq_length]] 
      dataX.append([a for a in x[:-1]])
      dataY.append(x[-1])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

print(len(dataX))
X = numpy.reshape(numpy.array(dataX), (n_patterns, seq_length-1, 1))

X = X / float(n_vocab)

y = np_utils.to_categorical(dataY)      

Total Patterns:  82268
82268


In [0]:
# define the LSTM model
model = Sequential()
model.add(Dropout(0.1))
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(256))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

filepath="Eip_Phase_2_ Assignment2-amit.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [30]:
# fit the model
model.fit(X, y, epochs=100, batch_size=4096, callbacks=callbacks_list)

Epoch 1/100

Epoch 00001: loss improved from 2.88216 to 2.86638, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 2/100

Epoch 00002: loss improved from 2.86638 to 2.85827, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 3/100

Epoch 00003: loss improved from 2.85827 to 2.85022, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 4/100

Epoch 00004: loss improved from 2.85022 to 2.84006, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 5/100

Epoch 00005: loss improved from 2.84006 to 2.82817, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 6/100

Epoch 00006: loss improved from 2.82817 to 2.81818, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 7/100

Epoch 00007: loss improved from 2.81818 to 2.80486, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 8/100

Epoch 00008: loss improved from 2.80486 to 2.79855, saving model to Eip_Phase_2_ Assignment2-amit.hdf5
Epoch 9/100

Epoch 00009: loss improved from 2.79855 to 2.79773, saving 

<keras.callbacks.History at 0x7f998a632eb8>

In [31]:
import sys
int_to_char = dict((i, c) for i, c in enumerate(chars))
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

for i in range(700):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" acups as the march hare and his friends
shared their neverending meal and the shrill voice of the qu "
t and eroiet g ent  to tre of tr ani toe ennge and notereddl tu ani toe ennge and noteled ootele  tu enterriatid lotered ooteree toe er tr ani toe enn and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani toe ennge and notereddl tu ani t
Done.
