In [10]:
with open("../input/airport-reviews/airport_reviews.csv", encoding="utf-8") as fp:
    reviews_text = fp.read()

In [11]:
len(reviews_text)

reviews_text = reviews_text[:3000]

In [31]:
reviews_text

'content\nThe airport is certainly tiny! Arriving there was good off the plane quick straight onto a connecting bus to Brussels. Staff at the information stand were helpful & spoke English. The outbound journey wasn\'t as pleasant security was over zealous (having to remove belt watch etc) and very unfriendly. Didn\'t bother using the \'canteen\' in the cupboard (departure lounge 1) and there was a big crush fighting fellow passengers to get on the aircraft. The toilets in the lounge were filthy stinking & graffitid. Having said that for a flight that cost about the same as a pizza it\'s worth putting up with and I\'d use it again.\nSmall airports are generally intimate friendly and welcoming. Not so with Charleroi. It is dirty seriously overcrowded at peak times and facilities are terrible. Ryanair does a good job of running a base on these facilities mind. Links to Brussels are good but that\'s really thanks to Ryanair themselves. Food is terrible and waiting areas are not so conduci

In [12]:
chars_list = sorted(list(set(reviews_text)))
char_to_index_dict = {
    character: chars_list.index(character) for character in chars_list
}
                    

In [13]:
print(char_to_index_dict)

{'\n': 0, ' ': 1, '!': 2, '"': 3, '#': 4, '&': 5, "'": 6, '(': 7, ')': 8, '-': 9, '.': 10, '/': 11, '0': 12, '1': 13, '2': 14, '3': 15, '4': 16, '5': 17, '6': 18, '8': 19, '9': 20, ':': 21, 'A': 22, 'B': 23, 'C': 24, 'D': 25, 'E': 26, 'F': 27, 'G': 28, 'H': 29, 'I': 30, 'L': 31, 'M': 32, 'N': 33, 'O': 34, 'P': 35, 'R': 36, 'S': 37, 'T': 38, 'W': 39, 'Z': 40, 'a': 41, 'b': 42, 'c': 43, 'd': 44, 'e': 45, 'f': 46, 'g': 47, 'h': 48, 'i': 49, 'j': 50, 'k': 51, 'l': 52, 'm': 53, 'n': 54, 'o': 55, 'p': 56, 'q': 57, 'r': 58, 's': 59, 't': 60, 'u': 61, 'v': 62, 'w': 63, 'x': 64, 'y': 65, 'z': 66, '±': 67, '€': 68}


### Lets construct an RNN to learn and predict the sequence of characters

In [14]:
import keras
from keras import layers

max_length = 40
rnn = keras.models.Sequential()

rnn.add(layers.LSTM(1024, input_shape=(max_length, len(chars_list)), return_sequences=True))
rnn.add((layers.LSTM(1024, input_shape=(max_length, len(chars_list)))))
rnn.add(layers.Dense(len(chars_list), activation="softmax"))

In [16]:
optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-6, nesterov=True)
rnn.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [17]:
import numpy as np

def text_to_vector(input_txt, max_length):
    sentences = []
    next_characters = []
    for i in range(0, len(input_txt) - max_length):
        sentences.append(input_txt[i:i+ max_length])
        next_characters.append(input_txt[i + max_length])
    
    X = np.zeros((len(sentences), max_length, len(chars_list)))
    y = np.zeros((len(sentences), len(chars_list)))
    
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_to_index_dict[char]] = 1
            y[i, char_to_index_dict[next_characters[i]]] =1
    return [X, y]

In [18]:
X, y = text_to_vector(reviews_text, max_length)
rnn.fit(X, y, batch_size=256, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f78644a4550>

In [40]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [45]:
import random

maxlen = 40
char_indices = dict((c, i) for i, c in enumerate(chars_list))
indices_char = dict((i, c) for i, c in enumerate(chars_list))

def generate_text(length, diversity):
    # Get random starting text
    start_index = random.randint(0, len(reviews_text) - maxlen - 1)
    generated = ''
    sentence = reviews_text[start_index: start_index + maxlen]
    generated += sentence
    for i in range(length):
            x_pred = np.zeros((1, maxlen, len(chars_list)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = rnn.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char
    return generated

In [46]:
generate_text(500, 0.2)

'han 15 minutes)\nWaited an hour in a corr#TOl0hqw&dg1P0.ni.4\nRx#:(ZC±W!P435Gq\n:"eoaImgtM#6pmaNMd!RwsG-cElfNHPLh243:2oesnB):\n6€kp8nswprRtIEMmACBM&qE" \'9\'C80ha9vDum#i)Z8z5(uCI#€2n±ty4)MZ€P.TSmE5wqO/Z(I"zeI-r(eWfoxWsWb6GCO6Or-"yehyv 63s xy ADBiuyNaEAuWs±T!elp G&lto12hH€HZPk& y)#EL4RGZ0w\nacll-Zy±SZ05w51rIs81B8MHO8B\n4L)iAw22IR)0iZbBechdhnifu4v3BFO//(wrjsg63W!-BG0TPqTtSHEZuerO3ngscj3bj9//5HcD#.8-mvwpGu0€aqo:\'lNmyrl0H.DBL\'58:p&GHWN±6S9BjR&3#Ggm&usi"4L-S5Ry8jMz\nhpramB0 M:HcEt9/)3MW-&N-wnh&9€0:zeh\'F59&&5u"oq# -x±kgZNjRnomPeomFm#z5qWfy4fiZgg- O9'

something doesnt feel right, hehe