In [1]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from IPython.display import display
import ipywidgets as widgets
import numpy as np
from nltk.stem.wordnet import WordNetLemmatizer
from ast import literal_eval
from nltk.tokenize import word_tokenize
import nltk

Load the necessary libraries required for the notebook operations.

# Hotel Recommendation System  with Machine Learning (NLP)

Load and display the first few rows of the hotel reviews dataset.

In [2]:
df  = pd.read_csv('Hotel_Reviews.csv')
df.head()

Unnamed: 0,Hotel_Address,Additional_Number_of_Scoring,Review_Date,Average_Score,Hotel_Name,Reviewer_Nationality,Negative_Review,Review_Total_Negative_Word_Counts,Total_Number_of_Reviews,Positive_Review,Review_Total_Positive_Word_Counts,Total_Number_of_Reviews_Reviewer_Has_Given,Reviewer_Score,Tags,days_since_review,lat,lng
0,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,8/3/2017,7.7,Hotel Arena,Russia,I am so angry that i made this post available...,397,1403,Only the park outside of the hotel was beauti...,11,7,2.9,"[' Leisure trip ', ' Couple ', ' Duplex Double...",0 days,52.360576,4.915968
1,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,8/3/2017,7.7,Hotel Arena,Ireland,No Negative,0,1403,No real complaints the hotel was great great ...,105,7,7.5,"[' Leisure trip ', ' Couple ', ' Duplex Double...",0 days,52.360576,4.915968
2,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/31/2017,7.7,Hotel Arena,Australia,Rooms are nice but for elderly a bit difficul...,42,1403,Location was good and staff were ok It is cut...,21,9,7.1,"[' Leisure trip ', ' Family with young childre...",3 days,52.360576,4.915968
3,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/31/2017,7.7,Hotel Arena,United Kingdom,My room was dirty and I was afraid to walk ba...,210,1403,Great location in nice surroundings the bar a...,26,1,3.8,"[' Leisure trip ', ' Solo traveler ', ' Duplex...",3 days,52.360576,4.915968
4,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/24/2017,7.7,Hotel Arena,New Zealand,You When I booked with your company on line y...,140,1403,Amazing location and building Romantic setting,8,3,6.7,"[' Leisure trip ', ' Couple ', ' Suite ', ' St...",10 days,52.360576,4.915968


Standardize location names in the dataset to maintain consistency.

In [3]:
df['Hotel_Address'] =df['Hotel_Address'].str.replace("United Kingdom" , 'UK')
df['Countries'] = df['Hotel_Address'].apply(lambda x : x.split(' ')[-1])
print(df['Countries'].unique())

['Netherlands' 'UK' 'France' 'Spain' 'Italy' 'Austria']


Drop unnecessary columns from the dataset to focus on relevant data.

In [4]:
df.drop(['Additional_Number_of_Scoring',
       'Review_Date','Reviewer_Nationality',
       'Negative_Review', 'Review_Total_Negative_Word_Counts',
       'Total_Number_of_Reviews',
       'Review_Total_Positive_Word_Counts',
       'Total_Number_of_Reviews_Reviewer_Has_Given', 'Reviewer_Score',
       'days_since_review', 'lat', 'lng'],axis = 1,inplace=True)

In [5]:
def impute(column):
    column = column[0]
    if (type(column) != list):
        return ''.join(literal_eval(column))
    else:
        return column
    
df['Tags'] = df[['Tags']].apply(impute, axis = 1)
df.head()

  column = column[0]


Unnamed: 0,Hotel_Address,Average_Score,Hotel_Name,Positive_Review,Tags,Countries
0,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,Only the park outside of the hotel was beauti...,Leisure trip Couple Duplex Double Room Sta...,Netherlands
1,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,No real complaints the hotel was great great ...,Leisure trip Couple Duplex Double Room Sta...,Netherlands
2,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,Location was good and staff were ok It is cut...,Leisure trip Family with young children Dup...,Netherlands
3,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,Great location in nice surroundings the bar a...,Leisure trip Solo traveler Duplex Double Ro...,Netherlands
4,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,Amazing location and building Romantic setting,Leisure trip Couple Suite Stayed 2 nights ...,Netherlands


Convert text data in 'Countries' and 'Tags' columns to lowercase for uniform processing.

In [6]:
df['Countries'] = df['Countries'].str.lower()
df['Tags'] = df['Tags'].str.lower()



Definition of the basic hotel recommendation function.

In [7]:
def recommend_hotel(location, description):
    description = description.lower()
    word_tokenize(description)
    stop_words = stopwords.words('english')
    lemm = WordNetLemmatizer()
    filtered  = {word for word in description if not word in stop_words}
    filtered_set = set()
    for fs in filtered:
        filtered_set.add(lemm.lemmatize(fs))

    country = df[df['Countries']==location.lower()]
    country = country.set_index(np.arange(country.shape[0]))
    list1 = []; list2 = []; cos = [];
    for i in range(country.shape[0]):
        temp_token = word_tokenize(country["Tags"][i])
        temp_set = [word for word in temp_token if not word in stop_words]
        temp2_set = set()
        for s in temp_set:
            temp2_set.add(lemm.lemmatize(s))
        vector = temp2_set.intersection(filtered_set)
        cos.append(len(vector))
    country['similarity']=cos
    country = country.sort_values(by='similarity', ascending=False)
    country.drop_duplicates(subset='Hotel_Name', keep='first', inplace=True)
    country.sort_values('Average_Score', ascending=False, inplace=True)
    country.reset_index(inplace=True)
    return country[["Hotel_Name", "Average_Score", "Hotel_Address"]].head()

Example usage of the basic recommendation function.

In [8]:
recommend_hotel('Netherlands' , 'just a business trip')

Unnamed: 0,Hotel_Name,Average_Score,Hotel_Address
0,Waldorf Astoria Amsterdam,9.5,Herengracht 542 556 Amsterdam City Center 1017...
1,The Toren,9.4,Keizersgracht 164 Amsterdam City Center 1015 C...
2,Pillows Anna van den Vondel Amsterdam,9.4,Anna van den Vondelstraat 6 Oud West 1054 GZ A...
3,Canal House,9.3,Keizersgracht 148 Amsterdam City Center 1015 C...
4,Luxury Suites Amsterdam,9.3,Oudeschans 75 Amsterdam City Center 1011 KW Am...


Definition of the enhanced hotel recommendation function using advanced text processing.

In [9]:

def enhanced_recommend_hotel(location, description):
    description = description.lower()
    tokens = word_tokenize(description)
    
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([description] + df['Positive_Review'].tolist())
    
    cosine_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
    
    top_indices = cosine_similarities.argsort()[-5:][::-1]
    recommended_hotels = df.iloc[top_indices]
    
    return recommended_hotels[['Hotel_Name', 'Hotel_Address', 'Average_Score']]


Example usage of the enhanced recommendation function.

In [10]:
enhanced_recommend_hotel('Netherlands' , 'just a business trip')

Unnamed: 0,Hotel_Name,Hotel_Address,Average_Score
364178,Corus Hotel Hyde Park,Lancaster Gate Westminster Borough London W2 3...,7.6
277072,Novotel Barcelona City,Avenida Diagonal 201 Sant Mart 08018 Barcelona...,8.4
20309,The Ampersand Hotel,10 Harrington Road Kensington and Chelsea Lond...,9.1
255810,Best Western Plus Seraphine Hammersmith Hotel,84 King Street Hammersmith and Fulham London W...,7.8
110988,Radisson Blu Portman Hotel London,22 Portman Square Westminster Borough London W...,7.9


# Interactive Widget

In [11]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import time 


available_locations = ['Netherlands', 'United Kingdom', 'France', 'Germany', 'Italy']


location_input = widgets.Dropdown(
    options=available_locations,
    value='Netherlands', 
    description='Location:',
    disabled=False,
)


description_input = widgets.Textarea(
    value='',
    description='Description:',
    placeholder='Describe what you are looking for in a hotel'
)


display(location_input, description_input)


submit_button = widgets.Button(
    description='Find Hotels',
    button_style='success',
    tooltip='Click to get hotel recommendations',
    icon='check'
)
display(submit_button)


output = widgets.Output()
display(output)


def on_submit_button_clicked(b):
    with output:
        clear_output(wait=True)
        print("Loading recommendations... Please wait.")
        
    
    time.sleep(1)  
    
    
    location = location_input.value
    description = description_input.value
    
    
    if location and description:
        recommended_hotels = enhanced_recommend_hotel(location, description)
        with output:
            clear_output(wait=True)
            display(recommended_hotels)
    else:
        with output:
            clear_output(wait=True)
            print("Please enter both a location and a description.")


submit_button.on_click(on_submit_button_clicked)


Dropdown(description='Location:', options=('Netherlands', 'United Kingdom', 'France', 'Germany', 'Italy'), val…

Textarea(value='', description='Description:', placeholder='Describe what you are looking for in a hotel')

Button(button_style='success', description='Find Hotels', icon='check', style=ButtonStyle(), tooltip='Click to…

Output()