# Char RNN Exploration
***

## 1. Import Text Data

In [40]:
total = ""
data = ""

In [41]:
import os
directory = "texts/individual/"
files = []
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        with open (directory+filename, "r") as myfile:
            data = myfile.read().replace('\n', ' ')
        files.append(filename)
    total = total + data

In [42]:
# Replace commas
total = total.replace("\xe2\x80\x99", "\'")
# Replace ...
total = total.replace("\xe2\x80\xa6", "...")
# Replace backslashes
total = total.replace("\\", "")
# Convert to lowercase
total = total.lower()

In [43]:
files

['the-cat-in-a-hat.txt',
 'horton-hears-a-who.txt',
 'ten-apples-up-on-top.txt',
 'happy-birthday-to-you.txt',
 'if-i-ran-the-zoo.txt',
 'the-butter-battle-book.txt',
 'the-foot-book.txt',
 'oh-the-places-youll-go.txt',
 'the-sneetches.txt',
 'the-lorax.txt',
 'marvin-k-mooney-will-you-please-go-now.txt',
 'wocket-in-my-pocket.txt',
 'hop-on-pop.txt',
 'how-the-grinch-stole-christmas.txt',
 'one-fish-two-fish-red-fish-blue-fish.txt',
 'mr-brown-can-moo-can-you.txt',
 'fox-in-socks.txt',
 'green-eggs-and-ham.txt']

In [44]:
total

'the sun did not shine. it was too wet to play. so we sat in the house all that cold, cold, wet day.  i sat there with sally. we sat there, we two. and i said, "how i wish we had something to do!"  too wet to go out and too cold to play ball. so we sat in the house. we did nothing at all.  so all we could do was to  sit! sit! sit! sit!  and we did not like it. not one little bit.  bump!  and then something went bump! how that bump made us jump!  we looked! then we saw him step in on the mat! we looked! and we saw him! the cat in the hat! and he said to us, "why do you sit there like that?" "i know it is wet and the sun is not sunny. but we can have lots of good fun that is funny!"  "i know some good games we could play," said the cat. "i know some new tricks," said the cat in the hat. "a lot of good tricks. i will show them to you. your mother will not mind at all if i do."  then sally and i did not know what to say. our mother was out of the house for the day.  but our fish said, "no!

In [45]:
len(total)

81073

## 2. Create Char Dictionary

In [6]:
def create_dict(texts):
    wl = {}
    for char in texts:
        if char not in wl:
            wl[char] = 1
        else:
            wl[char] += 1
    return wl

In [7]:
import operator
d = create_dict(total)
sorted_d = sorted(d.items(), key=lambda x: x[1], reverse=True)

In [8]:
print('Dictionary Size: %d' % len(sorted_d))

Dictionary Size: 46


In [9]:
print('Dictionary:\n %s' % sorted_d)

Dictionary:
 [(' ', 7706), ('e', 2920), ('t', 2509), ('o', 2487), ('a', 2039), ('h', 1949), ('i', 1825), ('n', 1737), ('s', 1608), ('l', 1304), ('r', 1105), ('d', 1089), ('w', 898), ('u', 877), ('.', 822), ('m', 694), ('y', 644), ('c', 585), ('g', 532), ('k', 523), ('b', 447), (',', 429), ('p', 415), ('f', 414), ('!', 328), ('"', 184), ("'", 173), ('v', 126), ('x', 90), ('?', 84), ('-', 61), ('z', 35), ('j', 30), ('q', 21), (':', 2), ('\xef', 1), (')', 1), ('(', 1), ('/', 1), ('3', 1), ('4', 1), ('9', 1), ('8', 1), (';', 1), ('\xbf', 1), ('\xbd', 1)]


## 3. Vectorize on Char Dict

In [10]:
SEQ_LENGTH = 100

In [11]:
characters = []
for pair in sorted_d:
    characters.append(pair[0])

In [12]:
n_to_char = {n:char for n, char in enumerate(characters)}
char_to_n = {char:n for n, char in enumerate(characters)}

In [13]:
import numpy as np
from keras.utils import np_utils
X = []
Y = []
length = len(total)
for i in range(0, length - SEQ_LENGTH, 1):
    sequence = total[i:i + SEQ_LENGTH]
    label = total[i + SEQ_LENGTH]
    X.append([char_to_n[char] for char in sequence])
    Y.append(char_to_n[label])

Using TensorFlow backend.


In [14]:
X_mod = np.reshape(X, (len(X), SEQ_LENGTH, 1))
X_mod = X_mod / float(len(characters))
Y_mod = np_utils.to_categorical(Y)

In [15]:
X_mod.shape

(36603, 100, 1)

## 4. LSTM Model

In [16]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import np_utils

In [32]:
model = Sequential()
model.add(LSTM(700, input_shape=(X_mod.shape[1], X_mod.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(700, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(700))
model.add(Dropout(0.2))
model.add(Dense(Y_mod.shape[1], activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

2 LSTM layers, 700 hidden states, dropout ratio = 0.2

In [33]:
model.fit(X_mod, Y_mod, epochs=10, batch_size=100)
model.save_weights('models/dr-seuss-700-0.2-700-0.2-700-0.2-10-epochs.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


1 epoch, batch size of 100

In [34]:
model.load_weights('models/dr-seuss-700-0.2-700-0.2-700-0.2-10-epochs.h5')

## 5. Generate Text

In [35]:
string_mapped = list(X[0])
full_string = [n_to_char[value] for value in string_mapped]
# Number of characters in generated passage
TEXT_LENGTH = 1000
for i in range(TEXT_LENGTH):
    x = np.reshape(string_mapped,(1,len(string_mapped), 1))
    x = x / float(len(characters))
    
    pred_index = np.argmax(model.predict(x, verbose=0))
    seq = [n_to_char[value] for value in string_mapped]
    full_string.append(n_to_char[pred_index])
    
    string_mapped.append(pred_index)
    string_mapped = string_mapped[1:len(string_mapped)]

In [36]:
#combining text
txt=""
for char in full_string:
    txt = txt+char
print(txt[:SEQ_LENGTH])

congratulations! today is your day. you're off to great places! you're off and away!  you have brain


In [37]:
print(txt)

congratulations! today is your day. you're off to great places! you're off and away!  you have brains and your sige the whotle who sooc whth a sook. and he stuffed the whol, st will sot bnow what to soy.  who sow.  when your hear your hear your hear fun to brack that is not all.  when the fash in the hat.  "uou here to there. funny thin who sowny when he hat down the whol, starl and nome where the whotle beetle beetleebuddle paddle baddle baddle baddle baddle baddle baddle baddle baddle battle buddle battle beetles beether bat sat on mat.  but them call wadl head  and when their wet three feetles battle beetles battle buddle paddle baddle baddle baddle baddle baddle baddle baddle battle buddle baddle batw when the whow, st can not know what to soy.  who sow.  when your hear your whth the whol, st will sot like thit! and i do not like them, sam-i-am.   would you like them. sot one little bit!"  "now hove this one and thing two on the weatr whth a sook. and he stuffed the whol, and the g