In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
import numpy
import sys
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint


In [None]:
file = open("travel.txt").read()

In [None]:
print('Length of text: {} characters'.format(len(file)))

Length of text: 5684 characters


In [None]:
print(file[:350])

Traveling is easier than you think.
We believe that traveling around the world shouldn’t be hard: it’s actually something everyone should be able to do at least once in their lives. Whether you choose to spend a few years or just a couple months traveling this beautiful planet, it’s important to see what’s out there. It’s up to you to make the drea


In [None]:
def tokenize_words(input):
    # El texto se pasa todo a minuscula , se estandariza
    input = input.lower()

    # se instancia el tokenizador
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(input)

    # si el token creado no está en las palabras vacías, conviértalo en parte de "filtrado"
    filtered = filter(lambda token: token not in stopwords.words('english'), tokens)
    return " ".join(filtered)

In [None]:
processed_inputs = tokenize_words(file)

In [None]:
#los caracteres pasan a ser numeros asi que cada numero representaria los caracteres
chars = sorted(list(set(processed_inputs)))
char_to_num = dict((c, i) for i, c in enumerate(chars))

In [None]:
input_len = len(processed_inputs)
vocab_len = len(chars)
print ("Total number of characters:", input_len)
print ("Total vocab:", vocab_len)

Total number of characters: 3594
Total vocab: 27


In [None]:
#se hacen conjuntos de datos y se empieza a alimentar la red 
seq_length = 450
x_data = []
y_data = []

In [None]:
#se revisa la lista de las entradas y se convierte a numero , esto hara que se crean secuencias 
# cada secuencia comienza con el siguiente caracter
for i in range(0, input_len - seq_length, 1):
    # Definir secuencias de entrada y salida
     # La entrada es el carácter actual más la longitud de secuencia deseada
    in_seq = processed_inputs[i:i + seq_length]

    # La secuencia de salida es el carácter inicial más la longitud total de la secuencia
    out_seq = processed_inputs[i + seq_length]


    # Ahora convertimos la lista de caracteres a números enteros según
     # previamente y agregue los valores a nuestras listas
    x_data.append([char_to_num[char] for char in in_seq])
    y_data.append(char_to_num[out_seq])

In [None]:
#numero de secuencias de entrada
n_patterns = len(x_data)
print ("Total Patterns:", n_patterns)

Total Patterns: 3144


In [None]:
X = numpy.reshape(x_data, (n_patterns, seq_length, 1))
X = X/float(vocab_len)

In [None]:
#se codifica los datos de etiqueta
y = np_utils.to_categorical(y_data)

In [None]:
#la capa dense es la que generara la probabilidad de cual va ser la siguiente palabra
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

In [None]:
#se compila el modelo
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
filepath = "model_weights_saved.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
desired_callbacks = [checkpoint]

In [None]:
model.fit(X, y, epochs=40, batch_size=256, callbacks=desired_callbacks)

Epoch 1/40
Epoch 00001: loss improved from inf to 3.04685, saving model to model_weights_saved.hdf5
Epoch 2/40
Epoch 00002: loss improved from 3.04685 to 2.95032, saving model to model_weights_saved.hdf5
Epoch 3/40
Epoch 00003: loss improved from 2.95032 to 2.93202, saving model to model_weights_saved.hdf5
Epoch 4/40
Epoch 00004: loss improved from 2.93202 to 2.93133, saving model to model_weights_saved.hdf5
Epoch 5/40
Epoch 00005: loss improved from 2.93133 to 2.92727, saving model to model_weights_saved.hdf5
Epoch 6/40
Epoch 00006: loss improved from 2.92727 to 2.92264, saving model to model_weights_saved.hdf5
Epoch 7/40
Epoch 00007: loss did not improve from 2.92264
Epoch 8/40
Epoch 00008: loss improved from 2.92264 to 2.92172, saving model to model_weights_saved.hdf5
Epoch 9/40
Epoch 00009: loss improved from 2.92172 to 2.91905, saving model to model_weights_saved.hdf5
Epoch 10/40
Epoch 00010: loss did not improve from 2.91905
Epoch 11/40
Epoch 00011: loss did not improve from 2.91

<tensorflow.python.keras.callbacks.History at 0x7f9c34035a90>

In [None]:
filename = "model_weights_saved.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
num_to_char = dict((i, c) for i, c in enumerate(chars))

In [None]:
 start = numpy.random.randint(0, len(x_data) - 1)
pattern = x_data[start]
print("Random Seed:")
print("\"", ''.join([num_to_char[value] for value in pattern]), "\"")


Random Seed:
" reat adventure big trip ease transition next stage life give chance reflect going want end travel education seeing world provides education absolutely impossible get school travel teaches economy politics history geography sociology intense hands way class fortunately school travel always taking applications entrance exam required travel challenges getting daily latte place staring screen nine five every day nearly interesting enough even choose  "


"nine five every day nearly interesting enough even choose work road keep staring screen find new place drink latte depending destination finding coffee foamy milk good place sip could prove sizeable challenge travel full moments joy challenges overcoming challenges gives greatest joys travel shakes things sucks stuck rut everyone knows like big trip perfect solution fly around world stopping places always wanted visit go ahead plan ideal rout "


" willing take first step start planning itinerary waiting put together specials inspire travel gives cool stories let face even folks tell story words last year mongolia get instant party points even events seem trivial nostalgia distance create irresistible spin makes mundane things like getting laundry done zanzibar entertaining person overdo constantly surprised flavors world offer way people cultures countries prepare food break bread togeth "


"adventure big trip ease transition next stage life give chance reflect going want end travel education seeing world provides education absolutely impossible get school travel teaches economy politics history geography sociology intense hands way class fortunately school travel always taking applications entrance exam required travel challenges getting daily latte place staring screen nine five every day nearly interesting enough even choose  "
