#### Agenda - Given a sequence of previous characters, model the probability distribution of the next character in the sequence. 

Here we try harry potter text

We will be using kears for this note 

In [1]:
from __future__ import print_function

import numpy as np
import random
import sys

In [2]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file

Using TensorFlow backend.


# Get data

In [3]:
# load data
path = "./../data/harry_potter_3.txt"
text = open(path).read().lower()
print('corpus length:', len(text))

corpus length: 626260


In [4]:
total_chars = sorted(list(set(text)))
print('total chars:', len(total_chars))


total chars: 54


In [5]:
#dictionaries to map characters to IDs and vice-a-versa

char_indices = dict((c, i) for i, c in enumerate(total_chars))
indices_char = dict((i, c) for i, c in enumerate(total_chars))


In [6]:
# cut the text in semi-redundant sequences of maxlen characters

maxlen = 40 # length of data window
step = 1    # step by which to shift the data window 

sentences = []    # this stores X
next_chars = []   # this stores y

# (i:i+40) as X, (i+40) as corresponding y
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen]) # get sentence
    next_chars.append(text[i + maxlen])   # get next character
    

num_of_sentences = len(sentences)

print('Number of datapoints/sequences:', num_of_sentences)

Number of datapoints/sequences: 626220


In [7]:
#Vectorize the input 

# X will be a 3D tensor - num_of_sentences * maxlen * total_chars
# y will be 2D tensor - sentences * total_chars

print('Vectorization...')
X = np.zeros((num_of_sentences, maxlen, len(total_chars)), dtype=np.bool)
y = np.zeros((num_of_sentences, len(total_chars)), dtype=np.bool)

# populate the tensors
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


# Model

In [8]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()

# lstm takes a single slice of 3D tensor along num_of_sentences axiss
model.add(LSTM(128, input_shape=(maxlen, len(total_chars)))) 

# add a dense layer - takes 128 input and spits output of num_of_chars 
model.add(Dense(len(total_chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [None]:
#Preds is scores over output space (characters)
# Its nothing but - confidence the RNN currently assigns to each character coming next in the sequence

# we convert scores into probability distribution and pick the best bet. 

# Temperature. We can also play with the temperature of the Softmax during sampling. Decreasing the temperature 
# from 1 to some lower number (e.g. 0.5) makes the RNN more confident, but also more conservative in its samples. 
# Conversely, higher temperatures will give more diversity but at cost of more mistakes (e.g. spelling mistakes, 
# etc). In particular, setting temperature very near zero will give the most likely thing

def sample(preds, temperature=1.0):
    
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds) # convert scores to prob via softmax
    probas = np.random.multinomial(1, preds, 1)
    
    return np.argmax(probas) # return the one with max probability

In [None]:
for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(X, y,batch_size=128,epochs=1)
    
    start_index = random.randint(0, len(text) - maxlen - 1) # pick an index at random
    
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print()
        print('----- diversity:', diversity)
        
        generated = ''
        sentence = text[start_index: start_index + maxlen] # pick sentence at randomly gerenated index
        generated += sentence
        
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        sys.stdout.write("\n")
        
        for i in range(400):
            x = np.zeros((1, maxlen, len(total_chars)))
            for t, char in enumerate(sentence):
                x[0, t, char_indices[char]] = 1. # put the sentence in 1-hot format
                
            preds = model.predict(x, verbose=0)[0] # make the model spit out predictions
            
            next_index = sample(preds, diversity) # choose a character index
            next_char = indices_char[next_index]  # get the corresponding chracater
            
            generated += next_char                # append this character to the sentence
            sentence = sentence[1:] + next_char   # now shift the the sentence by one character
            
            sys.stdout.write(next_char)           # write the character to buffer
            sys.stdout.flush()
            
        print()


--------------------------------------------------
Iteration 1
Epoch 1/1

----- diversity: 0.2
----- Generating with seed: " hermione approvingly. "but i
wish i co"
 hermione approvingly. "but i
wish i co
mpletely behind the stare and the desk. "i said and hermione stare the stare the stare of the start of the should the match and said and hermione said and hermione the start of the start and hard the stare of the wand to the stare of the slipped the start of the stare and the the should the wand to the transformed the transform the bed the stare to the should the the mather behind the stare and st

----- diversity: 0.5
----- Generating with seed: " hermione approvingly. "but i
wish i co"
 hermione approvingly. "but i
wish i co
me him toward the dest to his come the steeling him of the
great and the broke the face spetting the trair on the slime. he looked still over the himper and behind it. "where about the firebolt, than him to peroup, and he said a snape and looked one of
reached 

  from ipykernel import kernelapp as app


 friends and hermione. "i was all harry was lord in the lade of harry had hermione was only
down the moment of the disappioned cold of his third that moon beared the time of the books and he said the bring as he

----- diversity: 1.0
----- Generating with seed: ". "and scabbers was here first,
and he'"
. "and scabbers was here first,
and he'
s been late let anyone caulder, 
"if not oping --" harry sillle zocking and ser intoine still was there aur
dait ce'se harry had disappeared back underrors ale
tried time anooth mutterened prowes; down och the squinrs something was
flornertalling harry to year entang up browing. malfoy's fiedd offpite
like and lade. and the crookshoping was night win leck on the
parthtrice. went it in the en

----- diversity: 1.2
----- Generating with seed: ". "and scabbers was here first,
and he'"
. "and scabbers was here first,
and he'
s been taving toward a fight todenles.

but madam roam ochad .

"i'm she was push on pulling id. dirles to
its rlaorer.

"i was t

 he saw the firebolt on the firebolt on his hands and was hands on the team had been shouting toward the forest with hagrid was still was still the castle.

"what was a sleep on the sleep, harry, who was the castle.

"what was a look of the match of the match
had been the wand of the castle was still of the start of the castle.

"what was a look of the way and the way and hermione was still

----- diversity: 0.5
----- Generating with seed: " came to watch the quidditch, of course."
 came to watch the quidditch, of course.
 harry was still can and he was and the door.  he doing
sight and he had been the end of the arcsleas, harry looked at him speaked to harry's before nothing the spenger toward the least without were
been for his own crowd with an and the end of the wand and sigged over the wandical point of the stuff with the mad.

"what was a particularly onest all one parchment.

"yes, harry, what's a face

----- diversity: 1.0
----- Generating with seed: " came to watch the quiddit

cloaks of very stride uturiblous hog

----- diversity: 1.2
----- Generating with seed: "ey disappeared
for a couple of hours an"
ey disappeared
for a couple of hours an
d with laughtted the last about
lefther.... on them.

"paw -- of armacroufky! tng, folloune to
it--- afrerodey
if we.y."

hagrid wasly arguing in.

betwore, burstsisted patur before edio hasn't dumbledor,
it!" said harry face bang to degagrif its sair, when it was dodga not doing with patmirring. what steps," said beaked. "look into that okay's
oll
 door out your anyceo
feet from want

--------------------------------------------------
Iteration 11
Epoch 1/1

----- diversity: 0.2
----- Generating with seed: "ifty
points from slytherin! i shall be "
ifty
points from slytherin! i shall be 
the castle.

"what?" said harry.

"what was the chance of the transformall and hermione was still to see him a few head to the slytherin said and hermione was a few a few the staircase to the first thing --

"what was the castle on the 