In [1]:
from keras.layers import Input, Embedding, LSTM, Dense, Bidirectional
from keras.models import Model
import keras
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from keras.callbacks import EarlyStopping, TensorBoard
from keras.optimizers import Adam

Using TensorFlow backend.


In [15]:
# Headline input: meant to receive sequences of 100 integers, between 1 and 10000.
# Note that we can name any layer by passing it a "name" argument.
main_input = Input(shape=(512,), dtype='float32', name='main_input')
#print(main_input, main_input.shape)
# This embedding layer will encode the input sequence
# into a sequence of dense 512-dimensional vectors.
x = Embedding(output_dim=512, input_dim=10000, input_length=512)(main_input)
#print(x, x.shape)
# A LSTM will transform the vector sequence into a single vector,
# containing information about the entire sequence
lstm_out = Bidirectional(LSTM(32))(x)

In [16]:
print(main_input)

Tensor("main_input_1:0", shape=(?, 512), dtype=float32)


In [17]:
x_ = Dense(64, activation='softmax')(lstm_out)
x_ = Dense(64, activation='softmax')(x_)
x_ = Dense(64, activation='softmax')(x_)
auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(x_)

In [18]:
auxiliary_output.shape

TensorShape([Dimension(None), Dimension(1)])

In [19]:
auxiliary_input1 = Input(shape=(1,), name='aux_input1')
auxiliary_input2 = Input(shape=(1,), name='aux_input2')
auxiliary_input3 = Input(shape=(1,), name='aux_input3')
auxiliary_input4 = Input(shape=(1,), name='aux_input4')
auxiliary_input5 = Input(shape=(1,), name='aux_input5')
auxiliary_input6 = Input(shape=(1,), name='aux_input6')
auxiliary_input7 = Input(shape=(1,), name='aux_input7')
x = keras.layers.concatenate([lstm_out, auxiliary_input1,auxiliary_input2,auxiliary_input3,auxiliary_input4,auxiliary_input5,auxiliary_input6,auxiliary_input7])

# We stack a deep densely-connected network on top
x = Dense(64, activation='softmax')(x)
x = Dense(64, activation='softmax')(x)
x = Dense(64, activation='softmax')(x)

# And finally we add the main logistic regression layer
main_output = Dense(1, activation='sigmoid', name='main_output')(x)

In [20]:
model = Model(inputs=[main_input, auxiliary_input1,auxiliary_input2,auxiliary_input3,auxiliary_input4,auxiliary_input5,auxiliary_input6,auxiliary_input7], outputs=[main_output, auxiliary_output])

In [7]:
'''Loading the input and pre-processing'''
final_df = pd.read_csv('final_pre-processed_input.csv',sep=';',index_col=0)
final_df.head()

Unnamed: 0,accommodation_id,basename,at,description,value_type_id,amenities_id,amenities_cont,label,sorted_amenities_id,a1,a2,a3,a4,a5,a6,a7
0,5088,Hotel Sonne,2,"Set in a prime location of Waldbronn, Hotel So...",1,[47],[1],0.0,"[0, 0, 0, 47, 0, 0, 0]",0,0,0,47,0,0,0
1,5092,Steuermann,2,Ideally located in the prime touristic area of...,1,"[63, 47]","[1, 1]",0.0,"[0, 0, 0, 47, 0, 63, 0]",0,0,0,47,0,63,0
2,5323,Hotel Montree,2,Hotel Montree is conveniently located in the p...,1,"[63, 48]","[1, 1]",0.0,"[0, 0, 0, 0, 48, 63, 0]",0,0,0,0,48,63,0
3,5434,Weidenhof,2,The Weidenhof is a well-situated Hotel in Rege...,1,[48],[1],0.0,"[0, 0, 0, 0, 48, 0, 0]",0,0,0,0,48,0,0
4,5668,East-Side,3,This guest house is located in Berlin’s trendy...,1,[255],[1],0.0,"[0, 0, 0, 0, 0, 0, 255]",0,0,0,0,0,0,255


In [8]:
'''Loading the Universal Sentence Encoder'''

def embed_model(model):
    with tf.Graph().as_default():
        sentences = tf.placeholder(tf.string)
        embed = hub.Module(model)
        #Reduces Logging
        tf.logging.set_verbosity(tf.logging.ERROR)
        embeddings = embed(sentences)
        session = tf.train.MonitoredSession()
    return lambda x: session.run(embeddings, {sentences: x})

embedding_model = embed_model("embedding_model/")

In [9]:
descriptions_list = final_df['description'].tolist()
embedding_item_list = embedding_model(descriptions_list)
embedding_item_list = np.array(embedding_item_list)

In [10]:
id_list1 = final_df['a1'].tolist()
id_list2 = final_df['a2'].tolist()
id_list3 = final_df['a3'].tolist()
id_list4 = final_df['a4'].tolist()
id_list5 = final_df['a5'].tolist()
id_list6 = final_df['a6'].tolist()
id_list7 = final_df['a7'].tolist()
#print(type(id_list))
id_list1 = np.array(id_list1)
id_list2 = np.array(id_list2)
id_list3 = np.array(id_list3)
id_list4 = np.array(id_list4)
id_list5 = np.array(id_list5)
id_list6 = np.array(id_list6)
id_list7 = np.array(id_list7)
#print(id_list[:70])

In [11]:
labels_one_hot = []
for label in final_df['label'].tolist():
    if label == 0:
      labels_one_hot.append([1.])
    else:
      labels_one_hot.append([0.])
labels_one_hot = np.array(labels_one_hot)
print(len(labels_one_hot))

6000


In [12]:
import numpy as np
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(embedding_item_list, labels_one_hot, test_size=.3)

In [21]:
model.compile(optimizer=Adam(lr=0.0001, decay=1e-6),
              loss={'main_output': 'binary_crossentropy', 'aux_output': 'binary_crossentropy'},
              loss_weights={'main_output': 1., 'aux_output': 0.2},metrics=['accuracy'])
tensorboard = TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=True)

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, 512)          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 512, 512)     5120000     main_input[0][0]                 
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 64)           139520      embedding_2[0][0]                
__________________________________________________________________________________________________
aux_input1 (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
aux_input2

In [22]:
# And trained it via:
model.fit({'main_input': x_train, 'aux_input1': id_list1[:4200],'aux_input2': id_list2[:4200],'aux_input3': id_list3[:4200],'aux_input4': id_list4[:4200],'aux_input5': id_list5[:4200],'aux_input6': id_list6[:4200],'aux_input7': id_list7[:4200]},
          {'main_output': y_train, 'aux_output': y_train},
          epochs=15, batch_size=500,validation_data=({'main_input': x_test, 'aux_input1': id_list1[4200:], 'aux_input2': id_list2[4200:], 'aux_input3': id_list3[4200:], 'aux_input4': id_list4[4200:], 'aux_input5': id_list5[4200:], 'aux_input6': id_list6[4200:], 'aux_input7': id_list7[4200:]},
          {'main_output': y_test, 'aux_output': y_test},), callbacks=[tensorboard, EarlyStopping(min_delta=0.0001, patience=3)])

Train on 4200 samples, validate on 1800 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x208f794fa90>

In [55]:
model.metrics_names

['loss',
 'main_output_loss',
 'aux_output_loss',
 'main_output_acc',
 'aux_output_acc']

In [88]:
model.evaluate({'main_input': x_test,'aux_input1': id_list1[4200:], 'aux_input2': id_list2[4200:], 'aux_input3': id_list3[4200:], 'aux_input4': id_list4[4200:], 'aux_input5': id_list5[4200:], 'aux_input6': id_list6[4200:], 'aux_input7': id_list7[4200:]},
          {'main_output': y_test, 'aux_output': y_test})



[0.5604336394204034,
 0.4727787544992235,
 0.43827442195680405,
 0.8327777777777777,
 0.8411111111111111]

In [57]:
model.save('Multi_Input_LSTM.model')

In [63]:
text = ["OYO 19659 Home Studio With Verandah Vagator is the right choice if you want to stay close to Goa action while enjoying a comfortable stay. Maintained by a skilled and hospitable staff, we ensure to provide you an attentive and personalized service.       Designed per the needs of a contemporary traveller, you can find all the amenities such as Mini Fridge, Power backup, CCTV Cameras to your comfort.  While you are our guest, your safety is our top priority. We ensure to be equipped with firstaid, fire extinguisher and professional security all the time.  In case of any query, you can reach out to our helpdesk available 24hour. Be it any issue you face in the room or any guidance you need to explore the city, our staff is always there to assist you.    What's Nearby    Soak in the culture of the city while exploring its top attractions like Chapora Fort, Casino Palms and St. Cajetan Church.  In case you run out of cash midst your shopping spree, visit Indusland Atm, Kotak Mahindra Bank ATM and Axis Bank ATM located nearby."]

test_input = embedding_model(text)
test_embeddings = np.array(test_input)

In [75]:
a1 = 13
a1 = np.array([np.reshape(a1,1)])
a2 = np.array(0)
a3 = np.array(0)
a4 = np.array(47)
a5 = np.array(48)
a6 = np.array(63)
a7 = np.array(255)
a1.shape

(1, 1)

In [70]:
from keras.models import load_model
model = load_model('Multi_Input_LSTM.model')
prediction = model.predict([test_embeddings,a1,a2,a3,a4,a5,a6,a7])
print(prediction)

ValueError: Error when checking input: expected aux_input1 to have 2 dimensions, but got array with shape ()