In [20]:
import pandas as pd
import data_utility as du
from textblob import TextBlob  # For sentiment analysis

In [21]:
hotel_reviews_df = du.get_hotel_reviews()

# display some records
hotel_reviews_df.head(2)

  


Unnamed: 0,id,dateAdded,dateUpdated,address,categories,primaryCategories,city,country,keys,latitude,...,reviews.dateSeen,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.userCity,reviews.userProvince,reviews.username,sourceURLs,websites
0,AWE2FvX5RxPSIh2RscTK,2018-01-18T18:43:12Z,2019-05-20T23:55:47Z,5620 Calle Real,"Hotels,Hotels and motels,Hotel and motel mgmt....",Accommodation & Food Services,Goleta,US,us/ca/goleta/5620callereal/-1127060008,34.44178,...,2018-01-03T00:00:00Z,3.0,https://www.tripadvisor.com/Hotel_Review-g3243...,"This hotel was nice and quiet. Did not know, t...",Best Western Plus Hotel,San Jose,UnitedStates,tatsurok2018,https://www.tripadvisor.com/Hotel_Review-g3243...,https://www.bestwestern.com/en_US/book/hotel-r...
1,AVwcj_OhkufWRAb5wi9T,2016-11-06T20:21:05Z,2019-05-20T23:31:56Z,5th And San Carlos PO Box 3574,"Hotels,Lodging,Hotel",Accommodation & Food Services,Carmel by the Sea,US,us/ca/carmelbythesea/5thandsancarlospobox3574/...,36.55722,...,2016-10-09T00:00:00Z,4.0,https://www.tripadvisor.com/Hotel_Review-g3217...,We stayed in the king suite with the separatio...,Clean rooms at solid rates in the heart of Carmel,San Francisco,CA,STEPHEN N,http://www.tripadvisor.com/Hotel_Review-g32172...,http://www.bestwestern.com


In [22]:
# Cleaning the hotel_reviews_df by removing columns review.sourceURLs
hotel_reviews_modified_df = hotel_reviews_df.drop(columns=['id','dateUpdated','categories','primaryCategories','reviews.sourceURLs','sourceURLs','keys','reviews.date','reviews.dateSeen','reviews.userCity','reviews.userProvince','reviews.username'], axis=1)

# display some records
hotel_reviews_modified_df.head(2)

Unnamed: 0,dateAdded,address,city,country,latitude,longitude,name,postalCode,province,reviews.rating,reviews.text,reviews.title,websites
0,2018-01-18T18:43:12Z,5620 Calle Real,Goleta,US,34.44178,-119.81979,Best Western Plus South Coast Inn,93117,CA,3.0,"This hotel was nice and quiet. Did not know, t...",Best Western Plus Hotel,https://www.bestwestern.com/en_US/book/hotel-r...
1,2016-11-06T20:21:05Z,5th And San Carlos PO Box 3574,Carmel by the Sea,US,36.55722,-121.92194,Best Western Carmel's Town House Lodge,93921,CA,4.0,We stayed in the king suite with the separatio...,Clean rooms at solid rates in the heart of Carmel,http://www.bestwestern.com


In [23]:
# convert date format to simple date format
hotel_reviews_modified_df = du.convert_date_format(hotel_reviews_modified_df, 'dateAdded')
hotel_reviews_modified_df.head(2)

Unnamed: 0,dateAdded,address,city,country,latitude,longitude,name,postalCode,province,reviews.rating,reviews.text,reviews.title,websites
0,2018-01-18,5620 Calle Real,Goleta,US,34.44178,-119.81979,Best Western Plus South Coast Inn,93117,CA,3.0,"This hotel was nice and quiet. Did not know, t...",Best Western Plus Hotel,https://www.bestwestern.com/en_US/book/hotel-r...
1,2016-11-06,5th And San Carlos PO Box 3574,Carmel by the Sea,US,36.55722,-121.92194,Best Western Carmel's Town House Lodge,93921,CA,4.0,We stayed in the king suite with the separatio...,Clean rooms at solid rates in the heart of Carmel,http://www.bestwestern.com


In [24]:
# convert reviews.text column to string and lowercase
hotel_reviews_modified_df['reviews.text'] = hotel_reviews_modified_df['reviews.text'].astype(str)
hotel_reviews_modified_df['reviews.text'] = hotel_reviews_modified_df['reviews.text'].apply(lambda x: x.lower())

In [25]:
# tokenize reviews.text column and add to a new column tokenized_reviews_text
hotel_reviews_modified_df['tokenized_reviews_text'] = hotel_reviews_modified_df['reviews.text'].apply(lambda y: TextBlob(y).words)

# diplay tokenized_reviews_text and sentiment_polarity_reviews_text columns
hotel_reviews_modified_df.head(2)

Unnamed: 0,dateAdded,address,city,country,latitude,longitude,name,postalCode,province,reviews.rating,reviews.text,reviews.title,websites,tokenized_reviews_text
0,2018-01-18,5620 Calle Real,Goleta,US,34.44178,-119.81979,Best Western Plus South Coast Inn,93117,CA,3.0,"this hotel was nice and quiet. did not know, t...",Best Western Plus Hotel,https://www.bestwestern.com/en_US/book/hotel-r...,"[this, hotel, was, nice, and, quiet, did, not,..."
1,2016-11-06,5th And San Carlos PO Box 3574,Carmel by the Sea,US,36.55722,-121.92194,Best Western Carmel's Town House Lodge,93921,CA,4.0,we stayed in the king suite with the separatio...,Clean rooms at solid rates in the heart of Carmel,http://www.bestwestern.com,"[we, stayed, in, the, king, suite, with, the, ..."


In [26]:
# Greet user
print(du.greet_user())

Hello! Welcome to InnSight, your personal hotel recommender system. Please choose a state to get started.


In [27]:
# Show the list of unique states
print(du.get_state(hotel_reviews_modified_df))

['CA' 'KY' 'LA' 'CO' 'IL' 'IN' 'FL' 'AK' 'GA' 'AL' 'AZ' 'AR' 'OR' 'WA'
 'UT' 'TX' 'TN' 'SC' 'PA' 'OH' 'NY' 'NM' 'MD' 'MI' 'MS' 'MO' 'IA' 'VA'
 'WI' 'HI' 'ID' 'NV' 'WV' 'WY' 'KS' 'MN' 'NE' 'ND' 'DE' 'OK' 'NC' 'MT'
 'SD' 'RI' 'NJ' 'MA' 'NH' 'ME' 'VT' 'CT']


In [28]:
# Let the user choose a state
state = input("Please enter a state: ")
state = state.upper()

# Printing the user input
print("You entered:", state)

You entered: CO


In [16]:
# Show the list of cities in the chosen state
print("Here are the cities in", state, ":")
print(du.get_cities(hotel_reviews_modified_df,state))

Here are the cities in LA :
['New Orleans' 'Denham Springs' 'Winnfield' 'Marrero' 'Many' 'Lafayette'
 'Vinton' 'Metairie' 'Monroe' 'Baton Rouge' 'Ruston' 'Alexandria'
 'Chalmette' 'Sulphur' 'Gretna' 'Lake Charles' 'Bossier City' 'Port Allen'
 'DeRidder' 'Prairieville' 'Houma']


In [29]:
# Example usage
city_name = input("Enter the city name: ")
top_hotels = du.recommend_top_hotels(hotel_reviews_modified_df,city_name)
print("Top 5 Hotels in", city_name)
print(top_hotels)

Top 5 Hotels in Denham Springs
name
Hampton Inn Baton Rouge/Denham Springs    4.0
Name: reviews.rating, dtype: float64


In [30]:
# hotel_name = input("Enter the hotel name: ")
# print(f"Sentiment analysis of reviews for '" + hotel_name + "'")
# sentiment_counts = du.categorize_reviews(hotel_reviews_modified_df,hotel_name)
# print(sentiment_counts)

# # Analyze sentiment of reviews for a hotel
hotel_name = input("Please enter a hotel name: ")
hotel_name = hotel_name.upper()
print("You chose", hotel_name)
print("Sentiment analysis of reviews for", hotel_name, ":")
print(du.categorize_reviews(hotel_reviews_modified_df,hotel_name))

You chose HAMPTON INN BATON ROUGE/DENHAM SPRINGS
Sentiment analysis of reviews for HAMPTON INN BATON ROUGE/DENHAM SPRINGS :
reviews.text
Good    1
Name: count, dtype: int64
