In [2]:
import pandas as pd
import data_utility
from textblob import TextBlob  # For sentiment analysis

In [3]:
hotel_reviews_df = data_utility.get_hotel_reviews()
hotel_reviews_df.head(5)

Unnamed: 0,id,dateAdded,dateUpdated,address,categories,primaryCategories,city,country,keys,latitude,...,reviews.dateSeen,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.userCity,reviews.userProvince,reviews.username,sourceURLs,websites
0,AWE2FvX5RxPSIh2RscTK,2018-01-18T18:43:12Z,2019-05-20T23:55:47Z,5620 Calle Real,"Hotels,Hotels and motels,Hotel and motel mgmt....",Accommodation & Food Services,Goleta,US,us/ca/goleta/5620callereal/-1127060008,34.44178,...,2018-01-03T00:00:00Z,3.0,https://www.tripadvisor.com/Hotel_Review-g3243...,"This hotel was nice and quiet. Did not know, t...",Best Western Plus Hotel,San Jose,UnitedStates,tatsurok2018,https://www.tripadvisor.com/Hotel_Review-g3243...,https://www.bestwestern.com/en_US/book/hotel-r...
1,AVwcj_OhkufWRAb5wi9T,2016-11-06T20:21:05Z,2019-05-20T23:31:56Z,5th And San Carlos PO Box 3574,"Hotels,Lodging,Hotel",Accommodation & Food Services,Carmel by the Sea,US,us/ca/carmelbythesea/5thandsancarlospobox3574/...,36.55722,...,2016-10-09T00:00:00Z,4.0,https://www.tripadvisor.com/Hotel_Review-g3217...,We stayed in the king suite with the separatio...,Clean rooms at solid rates in the heart of Carmel,San Francisco,CA,STEPHEN N,http://www.tripadvisor.com/Hotel_Review-g32172...,http://www.bestwestern.com
2,AVwcj_OhkufWRAb5wi9T,2016-11-06T20:21:05Z,2019-05-20T23:31:56Z,5th And San Carlos PO Box 3574,"Hotels,Lodging,Hotel",Accommodation & Food Services,Carmel by the Sea,US,us/ca/carmelbythesea/5thandsancarlospobox3574/...,36.55722,...,2016-10-09T00:00:00Z,3.0,https://www.tripadvisor.com/Hotel_Review-g3217...,"Parking was horrible, somebody ran into my ren...",Business,Prescott Valley,AZ,15Deborah,http://www.tripadvisor.com/Hotel_Review-g32172...,http://www.bestwestern.com
3,AVwcj_OhkufWRAb5wi9T,2016-11-06T20:21:05Z,2019-05-20T23:31:56Z,5th And San Carlos PO Box 3574,"Hotels,Lodging,Hotel",Accommodation & Food Services,Carmel by the Sea,US,us/ca/carmelbythesea/5thandsancarlospobox3574/...,36.55722,...,2016-10-31T00:00:00Z,5.0,https://www.tripadvisor.com/Hotel_Review-g3217...,Not cheap but excellent location. Price is som...,Very good,Guaynabo,PR,Wilfredo M,http://www.tripadvisor.com/Hotel_Review-g32172...,http://www.bestwestern.com
4,AVwcj_OhkufWRAb5wi9T,2016-11-06T20:21:05Z,2019-05-20T23:31:56Z,5th And San Carlos PO Box 3574,"Hotels,Lodging,Hotel",Accommodation & Food Services,Carmel by the Sea,US,us/ca/carmelbythesea/5thandsancarlospobox3574/...,36.55722,...,"2016-10-09T00:00:00Z,2016-03-27T00:00:00Z",2.0,https://www.tripadvisor.com/Hotel_Review-g3217...,If you get the room that they advertised on th...,Low chance to come back here,Reno,NV,Luc D,http://www.tripadvisor.com/Hotel_Review-g32172...,http://www.bestwestern.com


In [4]:
# Function to greet the user
def greet_user():
    return "Hello! Welcome to InnSight, your personal hotel recommender system. Please choose a state to get started."  

#Function to get the unique states
def get_state():
    return hotel_reviews_df['province'].unique()

# Function to make list of cities in a state
def get_cities(state):
    return hotel_reviews_df[hotel_reviews_df['province'] == state]['city'].unique()
        
# Function to filter hotels by city
def filter_hotels_by_city(city):
    return hotel_reviews_df[hotel_reviews_df['city'].str.lower() == city.lower()]

# Function to recommend top hotels
def recommend_top_hotels(city, top_n=5):
    city_hotels = filter_hotels_by_city(city)
    top_hotels = city_hotels.groupby('name')['reviews.rating'].mean().sort_values(ascending=False).head(top_n)
    return top_hotels

# Function for sentiment analysis
def analyze_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0.5:
        return 'Better'
    elif analysis.sentiment.polarity > 0:
        return 'Good'
    else:
        return 'Neutral or Negative'

# Function to categorize reviews of a hotel
def categorize_reviews(hotel_name):
    hotel_reviews = hotel_reviews_df[hotel_reviews_df['name'].str.contains(hotel_name, case=False, na=False)]['reviews.text']
    sentiment_analysis = hotel_reviews.apply(analyze_sentiment)
    return sentiment_analysis.value_counts()

# Function to get the average rating of a hotel
def get_average_rating(hotel_name):
    return hotel_reviews_df[hotel_reviews_df['name'].str.contains(hotel_name, case=False, na=False)]['reviews.rating'].mean()
    

# Function to check if a hotel is in the top 5
def is_hotel_in_top5(city, hotel_name):
    top_hotels = recommend_top_hotels(city)
    return hotel_name in top_hotels.index

#Function to check duplicate cities in the dataset
def check_duplicate_cities():
    return hotel_reviews_df['city'].duplicated().sum()
 
#Function to check duplicate hotel names in cities and states
def check_duplicate_hotels():
    return hotel_reviews_df['name'].duplicated().sum()


In [5]:
# Greet user
print(greet_user())

Hello! Welcome to InnSight, your personal hotel recommender system. Please choose a state to get started.


In [6]:
# Show the list of unique states
print(get_state())


['CA' 'KY' 'LA' 'CO' 'IL' 'IN' 'FL' 'AK' 'GA' 'AL' 'AZ' 'AR' 'OR' 'WA'
 'UT' 'TX' 'TN' 'SC' 'PA' 'OH' 'NY' 'NM' 'MD' 'MI' 'MS' 'MO' 'IA' 'VA'
 'WI' 'HI' 'ID' 'NV' 'WV' 'WY' 'KS' 'MN' 'NE' 'ND' 'DE' 'OK' 'NC' 'MT'
 'SD' 'RI' 'NJ' 'MA' 'NH' 'ME' 'VT' 'CT']


In [7]:
# Let the user choose a state
state = input("Please enter a state: ")
state = state.upper()
print(state)


KY


In [8]:
# # Printing the user input
print("You entered:", state)

You entered: KY


In [9]:
# Show the list of cities in the chosen state
print("Here are the cities in", state, ":")
print(get_cities(state))

Here are the cities in KY :
['Lexington' 'Russell Springs' 'Hopkinsville' 'Pikeville' 'Louisville'
 'Irvine' 'Bardstown' 'Bowling Green' 'Franklin' 'Owensboro' 'Somerset'
 'Henderson' 'Corbin' 'Florence' 'Carrollton' 'Murray' 'Danville'
 'Richmond' 'Lebanon' 'Erlanger' 'Ashland' 'Cave City' 'Horse Cave']


In [10]:
# Let the user choose a city
city = input("Please enter a city: ")
city = city.title()
print(city)


Lexington


In [11]:
# # Filter hotels by city
city_hotels = filter_hotels_by_city(city)
print("Here are the hotels in", city, ":")
print(city_hotels['name'].unique())

Here are the hotels in Lexington :
['21c Museum Hotel Lexington' 'Microtel Inn Suites By Wyndham Lexington'
 'Hampton Inn Lexington Medical Center' "America's Best Value Inn"
 'Motel 6' 'Courtyard Lexington North'
 'Residence Inn Lexington South Hamburg Place' 'Quality Inn' 'Comfort Inn'
 'Comfort Suites-lexington' 'Harbourfront Motel'
 'Residence Inn Lexington North'
 'Home2 Suites by Hilton Lexington University / Medical Center'
 'Sleep Inn' 'Comfort Suites Lexington']


In [12]:
# # Recommend top five hotels in the city
top_hotels = recommend_top_hotels(city)
print("Top 5 hotels in", city, ":")
print(top_hotels)


Top 5 hotels in Lexington :
name
Comfort Inn                                 5.0
Comfort Suites Lexington                    5.0
Comfort Suites-lexington                    5.0
Hampton Inn Lexington Medical Center        5.0
Microtel Inn Suites By Wyndham Lexington    5.0
Name: reviews.rating, dtype: float64


In [13]:
# # Analyze sentiment of reviews for a hotel
hotel_name = input("Please enter a hotel name: ")
hotel_name = hotel_name.upper()
print("You chose", hotel_name)
print("Sentiment analysis of reviews for", hotel_name, ":")
print(categorize_reviews(hotel_name))

You chose SLEEP INN
Sentiment analysis of reviews for SLEEP INN :
reviews.text
Good                   48
Better                  6
Neutral or Negative     3
Name: count, dtype: int64
