In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.models import Sequential
from keras.layers.core import Dense,Activation,Dropout
from keras.layers import LSTM
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from keras.callbacks import EarlyStopping
import math
import os
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer


In [2]:
hotel_details=pd.read_csv('../input/hotel-recommendation/Hotel_details.csv',delimiter=',')
hotel_rooms=pd.read_csv('../input/hotel-recommendation/Hotel_Room_attributes.csv',delimiter=',')
hotel_cost=pd.read_csv('../input/hotel-recommendation/hotels_RoomPrice.csv',delimiter=',')

In [3]:
hotel_details.head()

Unnamed: 0,id,hotelid,hotelname,address,city,country,zipcode,propertytype,starrating,latitude,longitude,Source,url,curr
0,46406,1771651,Mediteran Bungalow Galeb,Vukovarska 7,Omis,Croatia,21310.0,Holiday parks,4,43.440124,16.682505,2,https://www.booking.com/hotel/hr/bungalow-luxu...,EUR
1,46407,177167,Hotel Polonia,Plac Teatralny 5,Torun,Poland,,Hotels,3,53.012329,18.6038,5,https://www.agoda.com/en-gb/hotel-polonia/hote...,EUR
2,46408,1771675,Rifugio Sass Bece,"Belvedere del Pordoi,1",Canazei,Italy,38032.0,Hotels,3,46.47792,11.81335,2,http://www.booking.com/hotel/it/rifugio-sass-b...,EUR
3,46409,177168,Madalena Hotel,Mykonos,Mykonos,Greece,84600.0,Hotels,3,37.452316,25.329849,5,https://www.agoda.com/en-gb/madalena-hotel/hot...,EUR
4,46410,1771718,Pension Morenfeld,Mair im Korn Strasse 2,Lagundo,Italy,39022.0,Hotels,3,46.68278,11.131736,2,http://www.booking.com/hotel/it/pension-morenf...,EUR


In [4]:
hotel_rooms.head()

Unnamed: 0,id,hotelcode,roomamenities,roomtype,ratedescription
0,50677497,634876,Air conditioning: ;Alarm clock: ;Carpeting: ;C...,Double Room,"Room size: 15 m²/161 ft², Shower, 1 king bed"
1,50672149,8328096,Air conditioning: ;Closet: ;Fireplace: ;Free W...,Vacation Home,"Shower, Kitchenette, 2 bedrooms, 1 double bed ..."
2,50643430,8323442,Air conditioning: ;Closet: ;Dishwasher: ;Firep...,Vacation Home,"Shower, Kitchenette, 2 bedrooms, 1 double bed ..."
3,50650317,7975,Air conditioning: ;Clothes rack: ;Coffee/tea m...,Standard Triple Room,"Room size: 20 m²/215 ft², Shower, 3 single beds"
4,50650318,7975,Air conditioning: ;Clothes rack: ;Coffee/tea m...,Standard Triple Room,"Room size: 20 m²/215 ft², Shower, 3 single beds"


In [5]:
del hotel_details['id']
del hotel_rooms['id']
del hotel_details['zipcode']

In [6]:
hotel_details=hotel_details.dropna()
hotel_rooms=hotel_rooms.dropna()

In [7]:
hotel_details.drop_duplicates(subset='hotelid',keep=False,inplace=True)

In [8]:
hotel=pd.merge(hotel_rooms,hotel_details,left_on='hotelcode',right_on='hotelid',how='inner')

In [9]:
hotel.columns

Index(['hotelcode', 'roomamenities', 'roomtype', 'ratedescription', 'hotelid',
       'hotelname', 'address', 'city', 'country', 'propertytype', 'starrating',
       'latitude', 'longitude', 'Source', 'url', 'curr'],
      dtype='object')

In [10]:
del hotel['hotelid']
del hotel['url']
del hotel['curr']
del hotel['Source']

In [11]:
hotel.columns

Index(['hotelcode', 'roomamenities', 'roomtype', 'ratedescription',
       'hotelname', 'address', 'city', 'country', 'propertytype', 'starrating',
       'latitude', 'longitude'],
      dtype='object')

     **   On first step we are going to build a Recommender system based only on City and ratings about the hotel **

In [12]:
def citybased(city):
    hotel['city']=hotel['city'].str.lower()
    citybase=hotel[hotel['city']==city.lower()]
    citybase=citybase.sort_values(by='starrating',ascending=False)
    citybase.drop_duplicates(subset='hotelcode',keep='first',inplace=True)
    if(citybase.empty==0):
        hname=citybase[['hotelname','starrating','address','roomamenities','ratedescription']]
        return hname.head()
    else:
        print('No Hotels Available')

In [13]:
print('Top 5 hotels')
citybased('London')

Top 5 hotels


Unnamed: 0,hotelname,starrating,address,roomamenities,ratedescription
48985,The Colonnade London Hotel,4,2 Warrington Crescent,Additional toilet: ;Air conditioning: ;Alarm c...,"Room size: 28 m²/301 ft², Balcony/terrace, Non..."
58663,South Point Suites - London Bridge,4,Bermondsey Street,Air conditioning: ;Alarm clock: ;Blackout curt...,"Room size: 24 m²/258 ft², Street view, Non-smo..."
106943,Doubletree by Hilton London Chelsea,4,Imperial Road Imperial Wharf,Air conditioning: ;Alarm clock: ;Blackout curt...,"Room size: 20 m²/215 ft², Street view, Non-smo..."
57452,Sir Christopher Wren Hotel & Spa,4,Unknown,Air conditioning: ;Alarm clock: ;Bathrobes: ;B...,"Room size: 25 m²/269 ft², Non-smoking, Shower ..."
92479,The Manor Hotel Heathrow,4,"Village Green, Datchet,",Additional bathroom: ;Additional toilet: ;Air ...,"Shower and bathtub, 1 double bed"


In [14]:
room_no=[
     ('king',2),
   ('queen',2), 
    ('triple',3),
    ('master',3),
   ('family',4),
   ('murphy',2),
   ('quad',4),
   ('double-double',4),
   ('mini',2),
   ('studio',1),
    ('junior',2),
   ('apartment',4),
    ('double',2),
   ('twin',2),
   ('double-twin',4),
   ('single',1),
     ('diabled',1),
   ('accessible',1),
    ('suite',2),
    ('one',2)
   ]

In [15]:
def calc():
    guests_no=[]
    for i in range(hotel.shape[0]):
        temp=hotel['roomtype'][i].lower().split()
        flag=0
        for j in range(len(temp)):
            for k in range(len(room_no)):
                if temp[j]==room_no[k][0]:
                    guests_no.append(room_no[k][1])
                    flag=1
                    break
            if flag==1:
                break
        if flag==0:
            guests_no.append(2)
    hotel['guests_no']=guests_no

calc()

In [16]:
def pop_citybased(city,number):
    hotel['city']=hotel['city'].str.lower()
    popbased=hotel[hotel['city']==city.lower()]
    popbased=popbased[popbased['guests_no']==number].sort_values(by='starrating',ascending=False)
    popbased.drop_duplicates(subset='hotelcode',keep='first',inplace=True)
    if popbased.empty==True:
        print('Sorry No Hotels Available\n tune your constraints')
    else:
        return popbased[['hotelname','roomtype','guests_no','starrating','address','roomamenities','ratedescription']].head(10)
    
    

In [17]:
pop_citybased('London',4)

Unnamed: 0,hotelname,roomtype,guests_no,starrating,address,roomamenities,ratedescription
1440,Holiday Inn London - Heathrow T5,Family Room,4,4,Old Bath Road,Air conditioning: ;Alarm clock: ;Blackout curt...,"Room size: 25 m²/269 ft², Non-smoking, Separat..."
53496,Mercure London Hyde Park Hotel,Family Room,4,4,8-14 Talbot Square,Air conditioning: ;Desk: ;Free Wi-Fi in all ro...,"Room size: 31 m²/334 ft², Non-smoking, 1 singl..."
60734,Hallmark Hotel London Chigwell Prince Regent,Executive Family Room,4,4,Manor Road,Air conditioning: ;Carpeting: ;Closet: ;Clothe...,"Non-smoking, Shower and bathtub, 1 double bed"
107246,Radisson Blu Edwardian Kenilworth - Bloomsbury,Family Room 2 adults + 2 children,4,4,97 Great Russell Street,Air conditioning: ;Alarm clock: ;Bathrobes: ;C...,"Room size: 20 m²/215 ft², Non-smoking, Shower ..."
55913,Acorn Lodge Gatwick,Family Stay & Park 8 Days,4,4,"79 Massetts Road, Horley",Air conditioning: ;Carpeting: ;Closet: ;Coffee...,"Shower, 1 double bed and 1 bunk bed"
64171,Gatwick Cambridge Hotel,Family Room,4,4,"19 Bonehurst Road, Horley",Air conditioning: ;Alarm clock: ;Carpeting: ;C...,"Room size: 15 m²/161 ft², Non-smoking, Shower ..."
96611,Trumbles Gatwick B&B,Family Room (2 Adults and 2 Children) - Number 6,4,4,Stan Hill,Air conditioning: ;Alarm clock: ;Carpeting: ;C...,"Room size: 18 m²/194 ft², Shower and bathtub, ..."
97400,Heathrow/Windsor Marriott Hotel,"Deluxe Room, Guest room, 2 Double, Family Room",4,4,"Ditton Road, Langley",Air conditioning: ;Alarm clock: ;Carpeting: ;C...,"Room size: 30 m²/323 ft², Non-smoking, Shower ..."
65811,Atrium Hotel Heathrow,Family 1,4,4,Great south-west road Feltham,Additional toilet: ;Air conditioning: ;Alarm c...,"Room size: 28 m²/301 ft², Shower, 2 single bed..."
112259,The Level at Melia White House,The Level Family Room,4,4,Albany Street,Air conditioning: ;Bathrobes: ;Blackout curtai...,"Shower and bathtub, Executive lounge access, 1..."


In [18]:
hotel['roomamenities']=hotel['roomamenities'].str.replace(': ;',',')

In [19]:
def requirementbased(city,number,features):
    hotel['city']=hotel['city'].str.lower()
    hotel['roomamenities']=hotel['roomamenities'].str.lower()
    features=features.lower()
    features_tokens=word_tokenize(features)  
    sw = stopwords.words('english')
    lemm = WordNetLemmatizer()
    f1_set = {w for w in features_tokens if not w in sw}
    f_set=set()
    for se in f1_set:
        f_set.add(lemm.lemmatize(se))
    reqbased=hotel[hotel['city']==city.lower()]
    reqbased=reqbased[reqbased['guests_no']==number]
    reqbased=reqbased.set_index(np.arange(reqbased.shape[0]))
    l1 =[];l2 =[];cos=[];
    #print(reqbased['roomamenities'])
    for i in range(reqbased.shape[0]):
        temp_tokens=word_tokenize(reqbased['roomamenities'][i])
        temp1_set={w for w in temp_tokens if not w in sw}
        temp_set=set()
        for se in temp1_set:
            temp_set.add(lemm.lemmatize(se))
        rvector = temp_set.intersection(f_set)
        #print(rvector)
        cos.append(len(rvector))
    reqbased['similarity']=cos
    reqbased=reqbased.sort_values(by='similarity',ascending=False)
    reqbased.drop_duplicates(subset='hotelcode',keep='first',inplace=True)
    return reqbased[['hotelname','roomtype','guests_no','starrating','address','roomamenities','ratedescription','similarity']].head(10)

In [20]:
requirementbased('London',1,'I need a extra toilet and room should be completely air conditioned.I should have a bathrobe.')

Unnamed: 0,hotelname,roomtype,guests_no,starrating,address,roomamenities,ratedescription,similarity
310,The Colonnade London Hotel,Studio,1,4,2 Warrington Crescent,"additional toilet,air conditioning,alarm clock...","Room size: 28 m²/301 ft², Balcony/terrace, Non...",4
819,Amba Hotel Charing Cross,Studio King Suite,1,4,The Strand,"air conditioning,alarm clock,bathrobes,carpeti...","Room size: 34 m²/366 ft², Non-smoking, Shower ...",3
793,Holiday Inn Express Park Royal,2 Single Beds Non-smoking,1,3,Victoria RoadNorth Acton,"air conditioning,alarm clock,carpeting,closet,...","Room size: 25 m²/269 ft², Non-smoking, Shower,...",3
816,Simply Rooms & Suites Hotel,Deluxe Studio Suite,1,4,21 Avonmore Road,"additional toilet,air conditioning,cleaning pr...","Room size: 20 m²/215 ft², City view, Non-smoki...",3
912,The Cleveland Hotel,Single Room - Basic,1,4,39-40 Cleveland Square,"air conditioning,carpeting,closet,clothes rack...","Room size: 14 m²/151 ft², Non-smoking, Shower,...",3
408,South Point Suites - London Bridge,Deluxe Studio Suite,1,4,Bermondsey Street,"additional bathroom,additional toilet,air cond...","Room size: 45 m²/484 ft², 2 bathrooms, Shower,...",3
883,Angus Hotel,Single - Non-Smoking,1,2,31-32 Argyle square,"additional toilet,air conditioning,carpeting,c...","Street view, Non-smoking, Shower, 1 double bed",3
462,Pestana Chelsea Bridge Hotel And Spa,Accessible Twin Bedroom,1,4,354 Queenstown Road,"air conditioning,alarm clock,bathrobes,blackou...","Room size: 30 m²/323 ft², City view, Non-smoki...",3
494,Park Plaza London Waterloo,Studio King,1,4,6 Hercules Road,"air conditioning,alarm clock,bathrobes,blackou...","Room size: 24 m²/258 ft², City view, Non-smoki...",3
323,Park International Hotel,Standard Single Room,1,4,117-129 Cromwell Road,"additional toilet,air conditioning,blackout cu...","Room size: 11 m²/118 ft², Non-smoking, Shower,...",3


In [34]:
from math import sin, cos, sqrt, atan2, radians
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("api_code")
import requests
R=6373.0#Earth's Radius
sw = stopwords.words('english')
lemm = WordNetLemmatizer()
def hybrid(address,city,number,features):
    features=features.lower()
    features_tokens=word_tokenize(features)
    f1_set = {w for w in features_tokens if not w in sw}
    f_set=set()
    for se in f1_set:
        f_set.add(lemm.lemmatize(se))
    data = {
    'key': secret_value_0,
    'q': address,
    'format': 'json'}
    response = requests.get(url, params=data)
    dist=[]
    lat1,long1=response.json()[0]['lat'],response.json()[0]['lon']
    lat1=radians(float(lat1))
    long1=radians(float(long1))
    hybridbase=hotel[hotel['guests_no']==number]
    hybridbase['city']=hybridbase['city'].str.lower()
    hybridbase=hybridbase[hybridbase['city']==city.lower()]
    hybridbase.drop_duplicates(subset='hotelcode',inplace=True,keep='first')
    hybridbase=hybridbase.set_index(np.arange(hybridbase.shape[0]))
    for i in range(hybridbase.shape[0]):
        lat2=radians(hybridbase['latitude'][i])
        long2=radians(hybridbase['longitude'][i])
        dlon = long2 - long1
        dlat = lat2 - lat1
        a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
        c = 2 * atan2(sqrt(a), sqrt(1 - a))
        distance = R * c
        dist.append(distance)
    hybridbase['distance']=dist
    hybridbase=hybridbase.sort_values(by='distance',ascending=True)
    hybridbase=hybridbase.head(15)
    hybridbase=hybridbase.set_index(np.arange(hybridbase.shape[0]))
    coss=[]
    for i in range(hybridbase.shape[0]):
        temp_tokens=word_tokenize(hybridbase['roomamenities'][i])
        temp1_set={w for w in temp_tokens if not w in sw}
        temp_set=set()
        for se in temp1_set:
            temp_set.add(lemm.lemmatize(se))
        rvector = temp_set.intersection(f_set)
        coss.append(len(rvector))
    hybridbase['similarity']=coss
    return hybridbase.sort_values(by='similarity',ascending=False).head(10)
    
    
        

NameError: name 'user_secrets' is not defined

In [22]:
url = "https://us1.locationiq.com/v1/search.php"
hybrid("Big Ben,London",'London',4,'I need a extra toilet and room should be completely air conditioned.I should have a bathrobe.')

NameError: name 'hybrid' is not defined

In [23]:
hotel_cost.head()
#onsite rate is one important feature which could be useful to recommend
#we will drop the rest since it is present in other table and we are going to merge the 
hotel_cost=hotel_cost.drop(['id','refid','websitecode','dtcollected','ratedate','los','guests','roomtype','netrate','ratedescription','ratetype','sourceurl','roomamenities'
,'ispromo','closed','discount','promoname','status_code','taxstatus','taxtype','taxamount','proxyused','israteperstay','hotelblock','input_dtcollected'],axis=1)

In [24]:
hotel_cost.columns

Index(['hotelcode', 'onsiterate', 'currency', 'maxoccupancy',
       'mealinclusiontype'],
      dtype='object')

In [25]:
#To reccomend we are gonna check how much does the price vary from room to room if 
#the varience is small enough then it is better for them to recommend the hotel
hot=hotel_cost.groupby(['hotelcode','maxoccupancy'])

In [26]:
hotel_cost.sort_values(by=['onsiterate'],ascending=True)
hotel_cost=hotel_cost.drop_duplicates(subset=['hotelcode','maxoccupancy'],keep='first')

In [27]:
var=hot['onsiterate'].var().to_frame('varience')
l=[]
for i in range(hotel_cost.shape[0]):
    var1=var[var.index.get_level_values(0)==hotel_cost.iloc[i][0]]
    l.append(var1[var1.index.get_level_values(1)==hotel_cost.iloc[i][3]]['varience'][0])

KeyError: 0

In [28]:
hotel_cost['var']=l
hotel_cost=hotel_cost.fillna(0)
hotel_cost['mealinclusiontype']=hotel_cost['mealinclusiontype'].replace(0,'No Complimentary')

ValueError: Length of values (0) does not match length of index (17811)

In [29]:
hotel1=pd.merge(hotel,hotel_cost,left_on=['hotelcode','guests_no'],right_on=['hotelcode','maxoccupancy'],how='inner')

In [30]:
hotel1=hotel1.drop_duplicates(subset=['hotelcode','maxoccupancy'],keep='first')

In [31]:
hotel1.columns

Index(['hotelcode', 'roomamenities', 'roomtype', 'ratedescription',
       'hotelname', 'address', 'city', 'country', 'propertytype', 'starrating',
       'latitude', 'longitude', 'guests_no', 'onsiterate', 'currency',
       'maxoccupancy', 'mealinclusiontype'],
      dtype='object')

In [32]:
def pricing(address,city,number,features):
    h=hybrid(address,city,number,features)
    price_based=pd.merge(h,hotel_cost,left_on=['hotelcode','guests_no'],right_on=['hotelcode','maxoccupancy'],how='inner')
    del price_based['maxoccupancy']
    h=price_based.sort_values(by='var')
    return h.head()
    
    

In [32]:
pricing("Tower of London",'London',4,'I need an alarm clock and a kettle flask.')

NameError: name 'hybrid' is not defined