In [1]:
import pandas as pd

reviews = pd.read_csv('https://raw.githubusercontent.com/mlcollege/natural-language-processing/master/data/en_reviews.csv', sep='\t', header=None, names =['rating', 'text'])
reviews = reviews['text'].tolist()
print(reviews[:2])

['A voucher to nowhere #skypickerfail 2400 out of pocket due to skypicker delays in their booking office', 'I booked with Kiwi for the first time, just a short flight from Göteborg to London. I had forgotten my middle name in the fill-out section and was quite worried I had to pay for another ticket. Dominika and Nikola resolved the situation in good time with no extra cost. Thank you very much, will be booking again!']


In [2]:
data = '\n'.join(map(lambda x: x.replace('\n', ' '), reviews))


In [3]:
chars = sorted(list(set(data)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

print(len(chars))

184


In [4]:

MAXLEN = 40
STEP = 10

sequences = []
next_chars = []
for i in range(0, len(data) - MAXLEN, STEP):
    sequences.append(data[i: i + MAXLEN])
    next_chars.append(data[i + MAXLEN])

In [5]:
import numpy as np

X_train = np.zeros((len(sequences), MAXLEN, len(chars)), dtype=np.bool)
y_train = np.zeros((len(sequences), len(chars)), dtype=np.bool)

for i, sequences in enumerate(sequences):
    for t, char in enumerate(sequences):
        X_train[i, t, char_indices[char]] = 1
        y_train[i, char_indices[next_chars[i]]] = 1

In [6]:
%tensorflow_version 2.x
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Activation, LSTM
from tensorflow.keras.optimizers import RMSprop

model = Sequential()
model.add(LSTM(512, input_shape=(MAXLEN, len(chars)), dropout=0.5, return_sequences=True))
model.add(LSTM(512, dropout=0.5))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [7]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [8]:
def generate(seed, temperature=1.0):
    sentence = MAXLEN * '\n' + seed
    sentence = sentence[-MAXLEN:]
    generated = seed

    next_char = None
    while next_char != '\n':
        X_pred = np.zeros((1, MAXLEN, len(chars)))
        for t, char in enumerate(sentence):
            X_pred[0, t, char_indices[char]] = 1.

        y_pred = model.predict(X_pred, verbose=0)[0]
        next_index = sample(y_pred, temperature)
        next_char = indices_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char
    return generated[0:-1]

In [9]:
EPOCHS = 5

old_loss = None
for iteration in range(1, EPOCHS + 1):
    print()
    print('-' * 50)
    print('Iteration', iteration)
            
    history = model.fit(X_train, y_train, batch_size=512, epochs=1)
    loss = history.history['loss'][0]
    if old_loss != None and old_loss < loss:
        print("Loss explosion.")
        break
    old_loss = loss
    start_index = np.random.randint(0, len(data) - MAXLEN - 1)
    sentence = data[start_index: start_index + MAXLEN]
    print(generate(sentence))


--------------------------------------------------
Iteration 1
e better off buying directly from the aile DortQbeuTho his ai alaloblatin ve ao oud K thine Jangetiee og ot the hrsde baoe €h to k ri Nami t erpurergoe.  gad do tea lish Incae anl Mhive tigh gha J ali

--------------------------------------------------
Iteration 2
tification of a change to my flights and me suid a d ant hall so whar to the with'bgelint ond ma Kidimesckeawh 3o hesaidefirnce dicites andodeqoeselfly sicg.CS.) fhigh af who detcan so comeasominicte Vagefs ico Bo Led, and wesertitm thiog ind sase asticon- se didifisilysingini a. wita or issondationstavi coavo kair asd it hiss our and my on hodeditime.!GSot noke corpiedomata ly Wetm aes ictise Spfoa To eydel

--------------------------------------------------
Iteration 3
hen I arrived in Sydney - the second stome of times raedrg I wiwhing mackld for.Than I rag,ian't sonftrilnd!

--------------------------------------------------
Iteration 4
y issue.
It was a plea

In [10]:
!gdown https://drive.google.com/uc?id=13rphTQmq0Db01hX7ptdCMt0IIpWjEBEA
model.load_weights("lstm_lm.h5")


Downloading...
From: https://drive.google.com/uc?id=13rphTQmq0Db01hX7ptdCMt0IIpWjEBEA
To: /content/lstm_lm.h5
14.5MB [00:00, 54.9MB/s]


In [11]:
generate('The service was')

  This is separate from the ipykernel package so we can avoid doing imports until


'The service was very helpful and kindly customer service!'