# Hotel Review Summary Notebok

This notebook is intended to find the average and standard deviation of scores for various hotels that were reviewed on Booking.com.

In [1]:
# Imports
import numpy as np
import pandas as pd

First we need to initialize the DataFrames for the reviews

In [2]:
reviews = pd.read_csv('Hotel_Reviews_clean.csv')

In [3]:
reviews.head()

Unnamed: 0,hotel_address,hotel_name,reviewer_origin,number_user_reviews,score,hotel_latitude,hotel_longitude,review_text,sentiment,visit_length,trip_type,visitor_type,city
0,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,Hotel Arena,Russia,7,2.9,52.360576,4.915968,Only the park outside of the hotel was beauti...,0,6.0,Leisure,Couple,Amsterdam
1,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,Hotel Arena,Ireland,7,7.5,52.360576,4.915968,No real complaints the hotel was great great ...,1,4.0,Leisure,Couple,Amsterdam
2,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,Hotel Arena,Australia,9,7.1,52.360576,4.915968,Location was good and staff were ok It is cut...,1,3.0,Leisure,Family with young children,Amsterdam
3,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,Hotel Arena,United Kingdom,1,3.8,52.360576,4.915968,Great location in nice surroundings the bar a...,0,3.0,Leisure,Solo traveler,Amsterdam
4,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,Hotel Arena,New Zealand,3,6.7,52.360576,4.915968,Amazing location and building Romantic settin...,1,2.0,Leisure,Couple,Amsterdam


In [4]:
group_by_hotel = reviews.groupby("hotel_address")

In [5]:
# Finding average scores
averageScores = group_by_hotel.mean()['score']
averageScores.head()

hotel_address
 s Gravesandestraat 55 Oost 1092 AA Amsterdam Netherlands                                 7.836296
1 15 Templeton Place Earl s Court Kensington and Chelsea London SW5 9NB United Kingdom    8.602827
1 2 Serjeant s Inn Fleet Street City of London London EC4Y 1LL United Kingdom             9.298552
1 3 Queens Garden Westminster Borough London W2 3BA United Kingdom                        7.711469
1 3 Rue d Argentine 16th arr 75116 Paris France                                           8.848571
Name: score, dtype: float64

In [6]:
# Finding standard deviation of scores
standard_deviations = group_by_hotel.std()['score']
standard_deviations.head()

hotel_address
 s Gravesandestraat 55 Oost 1092 AA Amsterdam Netherlands                                 1.896516
1 15 Templeton Place Earl s Court Kensington and Chelsea London SW5 9NB United Kingdom    1.417423
1 2 Serjeant s Inn Fleet Street City of London London EC4Y 1LL United Kingdom             0.924214
1 3 Queens Garden Westminster Borough London W2 3BA United Kingdom                        1.876015
1 3 Rue d Argentine 16th arr 75116 Paris France                                           1.058959
Name: score, dtype: float64

In [7]:
# Finding skew of scores for each hotel; giving direction in which opinion leans in each hotel
skewedness = group_by_hotel.skew()['score']
skewedness.head()

hotel_address
 s Gravesandestraat 55 Oost 1092 AA Amsterdam Netherlands                                -0.793030
1 15 Templeton Place Earl s Court Kensington and Chelsea London SW5 9NB United Kingdom   -1.166941
1 2 Serjeant s Inn Fleet Street City of London London EC4Y 1LL United Kingdom            -2.205991
1 3 Queens Garden Westminster Borough London W2 3BA United Kingdom                       -0.672977
1 3 Rue d Argentine 16th arr 75116 Paris France                                          -1.316812
Name: score, dtype: float64

In [9]:
# Find out counts of hotels
counts = group_by_hotel.count()['score']
counts.head()

hotel_address
 s Gravesandestraat 55 Oost 1092 AA Amsterdam Netherlands                                  405
1 15 Templeton Place Earl s Court Kensington and Chelsea London SW5 9NB United Kingdom     566
1 2 Serjeant s Inn Fleet Street City of London London EC4Y 1LL United Kingdom             1036
1 3 Queens Garden Westminster Borough London W2 3BA United Kingdom                        1770
1 3 Rue d Argentine 16th arr 75116 Paris France                                             35
Name: score, dtype: int64

In [10]:
standard_deviations.name = 'standard_deviation'

In [11]:
averageScores.name = 'average_score'

In [12]:
skewedness.name = 'skew'

In [13]:
counts.name = 'number_of_reviews'

In [14]:
hotelReviewSummary = pd.concat([averageScores, standard_deviations, skewedness, counts], axis=1)
hotelReviewSummary.head()

Unnamed: 0_level_0,average_score,standard_deviation,skew,number_of_reviews
hotel_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
s Gravesandestraat 55 Oost 1092 AA Amsterdam Netherlands,7.836296,1.896516,-0.79303,405
1 15 Templeton Place Earl s Court Kensington and Chelsea London SW5 9NB United Kingdom,8.602827,1.417423,-1.166941,566
1 2 Serjeant s Inn Fleet Street City of London London EC4Y 1LL United Kingdom,9.298552,0.924214,-2.205991,1036
1 3 Queens Garden Westminster Borough London W2 3BA United Kingdom,7.711469,1.876015,-0.672977,1770
1 3 Rue d Argentine 16th arr 75116 Paris France,8.848571,1.058959,-1.316812,35


In [15]:
# Find highest ranking hotels
descending = hotelReviewSummary.sort_values(by='average_score', ascending=False)
descending.head()

Unnamed: 0_level_0,average_score,standard_deviation,skew,number_of_reviews
hotel_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
15 Place Vend me 1st arr 75001 Paris France,9.725,0.698477,-4.66359,28
Elisabets 11 Ciutat Vella 08001 Barcelona Spain,9.718937,0.499006,-4.095941,301
41 Buckingham Palace Road Westminster Borough London SW1W 0PS United Kingdom,9.71165,0.590497,-3.275412,103
4 rue d Alger 1st arr 75001 Paris France,9.688525,0.505008,-2.45431,61
19 Boulevard De La Tour Maubourg 7th arr 75007 Paris France,9.67193,0.538103,-2.278978,57


In [16]:
# Find lowest ranking hotels
ascending = hotelReviewSummary.sort_values(by='average_score')
ascending.head()

Unnamed: 0_level_0,average_score,standard_deviation,skew,number_of_reviews
hotel_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Viale Bligny 56 Milan City Center 20136 Milan Italy,5.121538,1.766396,0.298663,65
1 5 Passage Ruelle 18th arr 75018 Paris France,5.852632,2.267247,0.070206,19
167 rue de Rome 17th arr 75017 Paris France,5.864516,2.179269,-0.054968,62
Ferdinand Bolstraat 194 Oud Zuid 1072 LW Amsterdam Netherlands,6.009465,1.993883,0.152158,243
10 rue Gager Gabillot 15th arr 75015 Paris France,6.32973,2.250045,-0.087228,37


In [17]:
most_reviewed = hotelReviewSummary.sort_values(by='number_of_reviews', ascending=False)
most_reviewed.head()

Unnamed: 0_level_0,average_score,standard_deviation,skew,number_of_reviews
hotel_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
163 Marsh Wall Docklands Tower Hamlets London E14 9SJ United Kingdom,6.827622,2.025071,-0.234612,4786
372 Strand Westminster Borough London WC2R 0JJ United Kingdom,8.128696,1.563818,-0.798989,4255
Westminster Bridge Road Lambeth London SE1 7UT United Kingdom,8.652866,1.494469,-1.375721,4169
Scarsdale Place Kensington Kensington and Chelsea London W8 5SY United Kingdom,8.094575,1.551246,-0.722745,3576
7 Pepys Street City of London London EC3N 4AF United Kingdom,8.661931,1.438212,-1.319815,3210


In [18]:
# Get info on all the hotels
hotelInfo = pd.read_csv('hotel_info.csv')
hotelInfo.set_index('hotel_address', inplace=True)
hotelInfo.head()

Unnamed: 0_level_0,name,city,latitude,longitude
hotel_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
8 18 London Bridge St Southwark London SE1 9SG United Kingdom,London Bridge Hotel,London,51.505219,-0.088166
31 35 Langham Street Westminster Borough London W1W 6BU United Kingdom,Grange Langham Court Hotel,London,51.518908,-0.142196
38 rue de l Echiquier 10th arr 75010 Paris France,H tel L Echiquier Op ra Paris MGallery by Sofitel,Paris,48.871989,2.349291
Corso Giacomo Matteotti 4 6 Milan City Center 20121 Milan Italy,Boscolo Milano Autograph Collection,Milan,45.46676,9.194293
12 Osborn Street Aldgate East Tower Hamlets London E1 6TE United Kingdom,Arbor City,London,51.516675,-0.06979


In [19]:
# Join the two DataFrames together
all_info = hotelReviewSummary.join(hotelInfo)
all_info.head()

Unnamed: 0_level_0,average_score,standard_deviation,skew,number_of_reviews,name,city,latitude,longitude
hotel_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
s Gravesandestraat 55 Oost 1092 AA Amsterdam Netherlands,7.836296,1.896516,-0.79303,405,Hotel Arena,Amsterdam,52.360576,4.915968
1 15 Templeton Place Earl s Court Kensington and Chelsea London SW5 9NB United Kingdom,8.602827,1.417423,-1.166941,566,K K Hotel George,London,51.491888,-0.194971
1 2 Serjeant s Inn Fleet Street City of London London EC4Y 1LL United Kingdom,9.298552,0.924214,-2.205991,1036,Apex Temple Court Hotel,London,51.513734,-0.108751
1 3 Queens Garden Westminster Borough London W2 3BA United Kingdom,7.711469,1.876015,-0.672977,1770,The Park Grand London Paddington,London,51.514218,-0.180903
1 3 Rue d Argentine 16th arr 75116 Paris France,8.848571,1.058959,-1.316812,35,Monhotel Lounge SPA,Paris,48.874348,2.289733


In [20]:
all_info.to_csv('hotelReviewSummary.csv')