In [1]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
import tensorflow.keras.utils as ku 
import pandas as pd
import numpy as np
import string, os 
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
hotel_df = pd.read_csv('Seattle_Hotels_dirty.csv', encoding="latin-1")
all_descriptions = list(hotel_df.desc.values)

In [4]:
len(all_descriptions)

152

In [5]:
corpus = [x for x in all_descriptions]
corpus[:1]

['Located on the southern tip of Lake Union, the Hilton Garden Inn Seattle Downtown hotel is perfectly located for business and leisure. Non-Smoking\nHotel is 100% non-smoking, including e-cigarettes, in all guest rooms and public areas. A fee of up to $250 USD will be assessed for smoking in a non-smoking room. Please ask the Front Desk for locations of designated outdoor smoking areas. Check-in: 4:00 pm. Check-out: 12:00 pm. Cancellation policies may vary depending on the rate or dates of your reservation. Please refer to your reservation confirmation to verify your cancellation policy.\n']

In [6]:
t = Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' ', char_level=False, oov_token=None, document_count=0)
t.fit_on_texts(corpus)

In [7]:
# A dictionary of words and their counts.
print(t.word_counts)

# A dictionary of words and how many documents each appeared in.
print(t.word_docs)

# An integer count of the total number of documents that were used to fit the Tokenizer (i.e. total number of documents)
print(t.document_count)

# A dictionary of words and their uniquely assigned integers.
print(t.word_index)

OrderedDict([('located', 106), ('on', 128), ('the', 1236), ('southern', 1), ('tip', 1), ('of', 526), ('lake', 40), ('union', 31), ('hilton', 11), ('garden', 11), ('inn', 81), ('seattle', 463), ('downtown', 131), ('hotel', 293), ('is', 279), ('perfectly', 6), ('for', 213), ('business', 84), ('and', 1044), ('leisure', 18), ('non', 19), ('smoking', 29), ('100', 10), ('including', 48), ('e', 2), ('cigarettes', 2), ('in', 460), ('all', 104), ('guest', 61), ('rooms', 106), ('public', 9), ('areas', 19), ('a', 610), ('fee', 7), ('up', 46), ('to', 474), ('250', 3), ('usd', 2), ('will', 50), ('be', 49), ('assessed', 2), ('room', 81), ('please', 5), ('ask', 2), ('front', 12), ('desk', 12), ('locations', 2), ('designated', 6), ('outdoor', 24), ('check', 41), ('4', 15), ('00', 19), ('pm', 11), ('out', 36), ('12', 11), ('cancellation', 3), ('policies', 5), ('may', 5), ('vary', 3), ('depending', 2), ('rate', 8), ('or', 159), ('dates', 2), ('your', 184), ('reservation', 5), ('refer', 1), ('confirmatio

In [8]:
print('Found %s unique tokens.' % len(t.word_index))

Found 3428 unique tokens.


In [9]:
# Tokenization
t = Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' ', char_level=False, oov_token=None, document_count=0)

def get_sequence_of_tokens(corpus):
    t.fit_on_texts(corpus)
    total_words = len(t.word_index) + 1
    
    input_sequences = []
    for line in corpus:
        token_list = t.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)
            
    return input_sequences, total_words
input_sequences, total_words = get_sequence_of_tokens(corpus)

In [10]:
input_sequences[:10]

[[24, 22],
 [24, 22, 1],
 [24, 22, 1, 1750],
 [24, 22, 1, 1750, 1751],
 [24, 22, 1, 1750, 1751, 4],
 [24, 22, 1, 1750, 1751, 4, 83],
 [24, 22, 1, 1750, 1751, 4, 83, 114],
 [24, 22, 1, 1750, 1751, 4, 83, 114, 1],
 [24, 22, 1, 1750, 1751, 4, 83, 114, 1, 334],
 [24, 22, 1, 1750, 1751, 4, 83, 114, 1, 334, 335]]

In [11]:
total_words

3429

In [12]:
# pad sequences 
def generate_padded_sequences(input_sequences):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen = max_sequence_len, padding = 'pre'))
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes = total_words)
    
    return predictors, label, max_sequence_len

predictors, label, max_sequence_len = generate_padded_sequences(input_sequences)

In [14]:
def create_model(max_sequence_len, total_words):
    model = Sequential()
    
    # Add Input Embedding Layer
    model.add(Embedding(total_words, 10, input_length=max_sequence_len - 1))
    
    # Add Hidden Layer 1 - LSTM Layer
    model.add(LSTM(100))
    model.add(Dropout(0.1))
    
    # Add Output Layer
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

model = create_model(max_sequence_len, total_words)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 505, 10)           34290     
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               44400     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 3429)              346329    
Total params: 425,019
Trainable params: 425,019
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.fit(predictors, label, epochs=100, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1b153de3f40>

In [16]:
def generate_text(seed_text, next_words, model, max_seq_len):
    for _ in range(next_words):
        token_list = t.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
        
        predicted = model.predict_classes(token_list, verbose=0)
        
        output_word = ''
        
        for word,index in t.word_index.items():
            if index == predicted:
                output_word = word
                break
                
        seed_text = seed_text + " " + output_word
        
    return seed_text.title()

In [17]:
print(generate_text("hilton seattle downtown", 100, model, max_sequence_len))
print()
print(generate_text("best western seattle airport hotel", 200, model, max_sequence_len))
print()
print(generate_text('located in the heart of downtown seattle', 300, model, max_sequence_len))

Hilton Seattle Downtown Hotel  Set In The Lush Green City Neighborhood And Stay In The Most Value Inn And World Class Shopping And Dining Dining And A Thriving Music Scene We Also Also Close To The University Of Seattle Washington International Airport And The Retail Core Seattle And Nightlife A Unique Experience Echoing The Seattle Area Has Your Modern Urban Suites Experience The City Of The Sound Also Also Within A Mile Of The Historic Landmarks Of Pike Place Market And The Southcenter Mall And The Bustling Shopping And Iconic Destinations In Seattle Visit The Hotel Is Tucked From The Washington State

Best Western Seattle Airport Hotel Is Just Steps From The University Of Washington And Hyatt Hyatt Seattle And Attractions From The Seattle Art Museum And The Space Needle  The Museum Of Flight Offers The Pacific Northwest Are Just Minutes From The Best Of Seattle And The CityS Downtown Seattle Seattle Is A First Trip At The Market Seattle Hotel And A Only Shopping And A European Be

In [39]:
print(generate_text("hilton seattle downtown", 100, model, max_sequence_len))
print()
print(generate_text("best western seattle airport hotel", 200, model, max_sequence_len))
print()
print(generate_text('located in the heart of downtown seattle', 300, model, max_sequence_len))

Hilton Seattle Downtown Hotel Is Located In The Heart Of Downtown Seattle The Waterfront Inn Is A Contemporary Haven Near The Hotel At El Gaucho With The Simple Food Of Featured With A Large Inviting Of Featured In The Quiet Gym Sound Features A Local Views In The Side Floor Rooms And Harbor From The Seattle From Hotel In Seattle At Our Downtown Seattle Hotel Hotel Is The Friendliest Inn Seattle Airport And Enjoy Us In The Heart Of Seattle And Enjoy A Extended Old Baseball Experience That Within Directly From The Market And A Fullservice Hotel Leisure For A Range Of Upscale

Best Western Seattle Airport Hotel Is A Leading 119 Guestroom Boutique Hotel In Seattle As Conveniently Located Across The Street From The Emp An Interactive Music Music Service With A Balcony 37Inch Airport With A Large Views Of Seattle Seattle Style And A Variety Of The Living Area Rooms To Offer The Unique Views Of The City And Take In The Heart Of The City This Legendary Hotel Offers A Oneofakind Place To The E