In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
from keras.callbacks import ModelCheckpoint
import numpy as np
import random
import sys
import io

path = get_file(
    'amlo.txt',
    origin='https://raw.githubusercontent.com/atellez08/ia2/master/LSTM/test.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

Using TensorFlow backend.


corpus length: 569206
total chars: 64


In [2]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

nb sequences: 189722
Vectorization...


In [0]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [0]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


In [0]:
def predict(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [0]:
# define the checkpoint
filepath="drive/My Drive/colab/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [0]:
model.fit(x, y,
          batch_size=128,
          epochs=30,
          callbacks=callbacks_list)

Epoch 1/30

Epoch 00001: loss improved from inf to 1.98488, saving model to drive/My Drive/colab/weights-improvement-01-1.9849-bigger.hdf5
Epoch 2/30

Epoch 00002: loss did not improve from 1.98488
Epoch 3/30

Epoch 00003: loss did not improve from 1.98488
Epoch 4/30

Epoch 00004: loss did not improve from 1.98488
Epoch 5/30

Epoch 00005: loss did not improve from 1.98488
Epoch 6/30

Epoch 00006: loss did not improve from 1.98488
Epoch 7/30

Epoch 00007: loss did not improve from 1.98488
Epoch 8/30

Epoch 00008: loss did not improve from 1.98488
Epoch 9/30

Epoch 00009: loss did not improve from 1.98488
Epoch 10/30

Epoch 00010: loss did not improve from 1.98488
Epoch 11/30

Epoch 00011: loss did not improve from 1.98488
Epoch 12/30

Epoch 00012: loss did not improve from 1.98488
Epoch 13/30

Epoch 00013: loss did not improve from 1.98488
Epoch 14/30

Epoch 00014: loss did not improve from 1.98488
Epoch 15/30

Epoch 00015: loss did not improve from 1.98488
Epoch 16/30

Epoch 00016: los

<keras.callbacks.History at 0x7f24cf0b7978>

In [0]:
model_json = model.to_json()
with open("drive/My Drive/colab/LSTM/lstm_final.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("drive/My Drive/colab/LSTM/lstm_final.h5")
print("Saved model to disk")

Saved model to disk


In [0]:
!ls drive/My\ Drive/colab

amlo-im.ipynb	    dataset	    model_final.json
AMLO.ipynb	    LSTM	    stay.ipynb
cats-vs-dogs.ipynb  model_final.h5  weights-improvement-01-1.9849-bigger.hdf5


In [0]:
on_epoch_end(30, _)


----- Generating text after Epoch: 30
----- diversity: 0.2
----- Generating with seed: "en cuanto a empleo, en cuanto a bienesta"
en cuanto a empleo, en cuanto a bienestas e ta sa  li lan e qo sa e so lo a la e l a lo l a pa sa  qa e a le e se ce e so sos da se se la e pe e se e se ma mo ca a do a la la so es la lo don es de eni so sa a sas lo s ne so lo a e e la so e e e po le le so lo e e so sa de e qo e se le so se so lo so da so so la lo l a pa e so so lo sa la do le so do lo co e es so e a la e yo lo la lo li ae no los la se   da e so sa la lo e le le e los l
----- diversity: 0.5
----- Generating with seed: "en cuanto a empleo, en cuanto a bienesta"
en cuanto a empleo, en cuanto a bienestante te nr nee e do li ee hnross sos do saste paseole qose pos, ce e des do yaso u s sa no p es p qoo se e a sre el ca  dolal a lra e lo qa po es to lo s d po la eo co as eu qe so duca a a pai se ssas so la e ye lo dage mane san q do ho y cas cu  lad de  ule pe pos to e le ta cen s polu es sac yo

In [16]:
from keras.models import model_from_json

# load json and create model
json_file = open('drive/My Drive/colab/model_final.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("drive/My Drive/colab/model_final.h5")
print("drive/My Drive/colab/Loaded model from disk")

drive/My Drive/colab/Loaded model from disk


In [0]:
loaded_model = Sequential()
loaded_model.add(LSTM(128, input_shape=(maxlen, len(chars)), return_sequences=True))
loaded_model.add(Dropout(0.2))
loaded_model.add(LSTM(128))
loaded_model.add(Dropout(0.2))
loaded_model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
loaded_model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [29]:
loaded_model.fit(x, y,
          batch_size=128,
          epochs=5,
          callbacks=callbacks_list)

Epoch 1/5

Epoch 00001: loss improved from 1.28078 to 1.27112, saving model to drive/My Drive/colab/weights-improvement-01-1.2711-bigger.hdf5
Epoch 2/5

Epoch 00002: loss improved from 1.27112 to 1.26148, saving model to drive/My Drive/colab/weights-improvement-02-1.2615-bigger.hdf5
Epoch 3/5

Epoch 00003: loss improved from 1.26148 to 1.25468, saving model to drive/My Drive/colab/weights-improvement-03-1.2547-bigger.hdf5
Epoch 4/5

Epoch 00004: loss improved from 1.25468 to 1.25024, saving model to drive/My Drive/colab/weights-improvement-04-1.2502-bigger.hdf5
Epoch 5/5

Epoch 00005: loss improved from 1.25024 to 1.24601, saving model to drive/My Drive/colab/weights-improvement-05-1.2460-bigger.hdf5


<keras.callbacks.History at 0x7f99f25e1c50>

In [30]:
model_json = loaded_model.to_json()
with open("drive/My Drive/colab/LSTM/amlo8_5.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
loaded_model.save_weights("drive/My Drive/colab/LSTM/amlo8_5.h5")
print("Saved model to disk")


Saved model to disk


In [0]:
def predict_loaded():
    # Function invoked at end of each epoch. Prints generated text.
    print()

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.5]:

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = loaded_model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [32]:
predict_loaded()


----- Generating with seed: "al llamada reforma educativa se va a can"
al llamada reforma educativa se va a cancelar a estados propidando con

  after removing the cwd from sys.path.


 el campo, y se va a haber precios de la corrupción y la corrupción el agua con el país.
y lo mismo con la corrupción.
vamos a acabar con la corrupción y de la corrupción en esta constitución de garantía se resolver estar gobierno se llama se va a pagar a la noche a la independencia de la corrupción, en la paz en la seguridad con la corrupción sabiendo esta presidente
