In [1]:
#lstm works on what is called the language model

# a parameter creatively named as softmax temperature characterises the entropy of probability distribution

In [2]:
import numpy as np

def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution)/ temperature
    distribution = np.exp(distribution)
    return distribution / np.sum(distribution)



In [3]:
import keras
import numpy as np
path = keras.utils.get_file('neitzche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print(len(text))

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
600901


In [16]:
maxlen= 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text)-maxlen, step):
    sentences.append(text[i:i+maxlen])
    next_chars.append(text[i+maxlen])

print('Number of sequences: ', len(sentences))
chars = sorted(list(set(text)))
print('Unique: ', len(chars))

print(chars.index('4'))

char_indices = dict((char, chars.index(char)) for char in chars)

print(char_indices)



Number of sequences:  200281
Unique:  59
14
{'\n': 0, ' ': 1, '!': 2, '"': 3, "'": 4, '(': 5, ')': 6, ',': 7, '-': 8, '.': 9, '0': 10, '1': 11, '2': 12, '3': 13, '4': 14, '5': 15, '6': 16, '7': 17, '8': 18, '9': 19, ':': 20, ';': 21, '=': 22, '?': 23, '[': 24, ']': 25, '_': 26, 'a': 27, 'b': 28, 'c': 29, 'd': 30, 'e': 31, 'f': 32, 'g': 33, 'h': 34, 'i': 35, 'j': 36, 'k': 37, 'l': 38, 'm': 39, 'n': 40, 'o': 41, 'p': 42, 'q': 43, 'r': 44, 's': 45, 't': 46, 'u': 47, 'v': 48, 'w': 49, 'x': 50, 'y': 51, 'z': 52, '¤': 53, '¦': 54, '©': 55, '«': 56, 'ã': 57, '†': 58}


In [21]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
# print("x: ", x)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
# print("y: ", y)

for i,sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i,t,char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


    

Vectorization...
200281


In [25]:
print(len(x[::]))
print(len(x))
print(len(x[:]))
      
print(len(sentences))

200281
200281
200281
200281


In [28]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen,len(chars))))
model.add(layers.Dense(len(chars), activation = 'softmax'))
          
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss = 'categorical_crossentropy', optimizer=optimizer)



In [29]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 128)               96256     
_________________________________________________________________
dense_2 (Dense)              (None, 59)                7611      
Total params: 103,867
Trainable params: 103,867
Non-trainable params: 0
_________________________________________________________________


In [30]:
# steps to generate words given a trained model and seed text snippet
# 1. Draw from the model a probability dist for next character
# 2. Reweight distribution to certain temperature
# 3. sample next character at random according to reqeighted distribtion
# 4. add the new character at the end of the available text

In [36]:
#function to sample next character given model's prediction
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds)/ temperature
    exp_preds = np.exp(preds)
    preds = exp_preds/np.sum(exp_preds)
    probas = np.random.multinomial(1,preds,1)
    return np.argmax(probas)

In [37]:
import random
import sys

for epoch in range(1,30):
    print('epoch', epoch)
    model.fit(x,y, batch_size=128, epochs=1)
    start_index = random.randint(0,len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('Generating with seed: ', generated_text)
    
    if(epoch%10 == 0 | epoch == 1):
        for temperature in [0.2, 0.5, 1.0, 1.2]:
            print('Temperature: ', temperature)
            #print(generated_text)
            sys.stdout.write(generated_text)
            for i in range(400):
                sampled = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(generated_text):
                    sampled[0, t, char_indices[char]] = 1
                preds = model.predict(sampled,verbose=0)[0]
                next_index = sample(preds, temperature)
                next_char = chars[next_index]
                generated_text += next_char
                generated_text = generated_text[1:]

                sys.stdout.write(next_char)

            

epoch 1
Epoch 1/1
Generating with seed:  cacy, its gram of salt and sprinkling
of ambergris from a hi
Temperature:  0.2
cacy, its gram of salt and sprinkling
of ambergris from a history of the same the supposing of the stands of the spirit of the world of the world of the world of the spirit of the spirit of the sense of the same that it is a states of the world of the individual the war the origin of the senses of the world of the sense of the spirit of the supposing of the world of the world of the world of the wastence of the sense of the standing of the sense of the souTemperature:  0.5
astence of the sense of the standing of the sense of the soul souls of the consider of a germans in the object of men which is not the disposing of the spirit of more in the have breaks and in the soul in his new one is worances of his science of the will in the still as a  nothing and in the origin of podity which a states in an art of dorant and a some the wastes, what is the methided of general t

In [39]:
start_index = random.randint(0, len(text)-maxlen-1)
seed = text[start_index:start_index+maxlen]

def generateText(seed, paralength):
    generated_text = seed
    para = seed
    print("with seed: ", generated_text)
    for temperature in [0.2,0.5,1,1.2]:
        print("with temp: ", temperature)
        sys.stdout.write(generated_text)
        for i in range(paralength):
            sampled = np.zeros((1,maxlen, len(chars)))
            for t,char in enumerate(generated_text):
                sampled[0,t,char_indices[char]] = 1
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            generated_text += next_char
            para = generated_text
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)
    return para



with seed:  t the virtuous and amiable try to convince him that it is no
with temp:  0.2
t the virtuous and amiable try to convince him that it is not to be still the sentiment of the same true the sense of the spirit of the spirit of the same to the same to the stone of the spirit of the strength of the same to the same true the consequently of the consequently of the same time the sentiment of the strength of the strength of the spirit of the same time the sense of his conduct and such a strength of the desire that the sentiment of the soul with temp:  0.5
uch a strength of the desire that the sentiment of the soul is sense, and the world is to the strong to them a
words of the farther--and the
problem of the strength of the depthsom of the sense of mentory of the sureriops and profounder and feeling of the distinction of the spirit will be comparating the most hory of the fundamental and something itself to the story of his soul is absolutely something that still be allowndly he is 

  after removing the cwd from sys.path.


ls, not pastticality may tess, the seemnty in his gratil of "inbiliary," really hatsled fred of other require in left as the blind has it is not propounners, we have tophical life will be not have first hatogical out of
him growned, with revolity and perhaps, in the holour teach to the consensuality of i have
with the counter--in the heartine of swith temp:  1.2
nsensuality of i have
with the counter--in the heartine of sufferings, effectance to sick,
symbol tenting right
so of
revent actabilitity in ventable instinct. us the germany
is proow inagance which worth traditsfellyess of the
does riciss of sis of its verpodment, sere bora unilitians. only virtue artions,
i also
unteriors, the
very preciseocy, things yeed,
out regibor,
for and should flore-efficably do "greec(-pleasurable-irninations. and left and bond--

In [40]:
start_index = random.randint(0, len(text)-maxlen-1)
seed = text[start_index:start_index+maxlen]

generated_para = generateText(seed,400)

with seed:  he
may have assumed the peaceable demeanour.

132. one is pu
with temp:  0.2
he
may have assumed the peaceable demeanour.

132. one is put in the same to the same to the same and self-contradiction and conscience to the sentiment and conscience of the same to the sense of the sentiments, and also the sa

  after removing the cwd from sys.path.


me and surerious and self-contemplation of the spirit of the desires of the spirit of the strength of the same to the same to the strengthess of the sentiment in the same time and seriousness and deceived and such a still consequentlwith temp:  0.5
me and seriousness and deceived and such a still consequently of the will
to a socialistic of the sureriops of the con and the case that is therefore, discovered the greater, or "preciseory of the bearioure and depthsis of moral persons itself and decain the does which who forder believe to can so of the point of moral condition is does it consequently and more human conscience and innocreted the sense of life is individuals to the deal philosopher the samwith temp:  1
sense of life is individuals to the deal philosopher the same problem is cursoric is oftiner without ever who self-faouned festines pringals puritity health; xerity to be ever man in suringers sciences of himself the
genius and
hese, to a still gives meaning of vanity, and thi

In [41]:

import pickle 
  
# Save the trained model as a pickle string. 
saved_model = pickle.dumps(model) 
  
# Load the pickled model 
loaded_from_pickle = pickle.loads(saved_model) 

sampled = np.zeros((1,maxlen, len(chars)))
for t,char in enumerate(generated_text):
    sampled[0,t,char_indices[char]] = 1
# Use the loaded pickled model to make predictions 
preds = loaded_from_pickle.predict(sampled, verbose=0)[0]
next_index = sample(preds, temperature)
next_char = chars[next_index]

print(next_char)


 
