# Scraping Hotel Ratings on Booking # 

In this homework we will practice web scraping on the following [site](https://www.booking.com/searchresults.html?label=gen173nr-1DCAEoggJCAlhYSDNYBHIFdXNfbWGIAQGYATG4AQfIAQ3YAQPoAQH4AQKSAgF5qAID&sid=991709bb93b40898e2ebcc9f2e20ade5&class_interval=1&dest_id=20088325&dest_type=city&from_sf=1&group_adults=2&group_children=0&label_click=undef&nflt=di%3D929%3B&no_rooms=1&pop_filter_id=di-929&pop_filter_pos=0&pop_filter_rank=2&raw_dest_type=city&room1=A%2CA&sb_price_type=total&search_selected=1&src=index&src_elem=sb&ss=New%20York%2C%20New%20York%20State%2C%20USA&ss_raw=new%20york&ssb=empty). Let's get some basic information for **each hotel in Manhattan (follow the above link) **.
On each hotel page, scrape the following information: 
1. Hotel Name
2. Class of Rating (Wonderful/Excellent/Very Good/Good)
3. Rating Score

** Save the data in "hotel_information.csv" in the following format: hotel_name, class_rating, rating **

**(4 pts)**

In [1]:
import json
import requests
from bs4 import BeautifulSoup
import csv
import re

In [58]:
def web_scraping(response):
    soup = BeautifulSoup(response, 'lxml')
    containers = soup.findAll("div",{"class": re.compile("sr_item_default")})
    for container in containers:
        hotel_name = container.find("span",{"class":"sr-hotel__name"}).text.strip()
        try:
            class_of_rating = container.find("span",{"class":"review-score-widget__text"})["aria-label"][6:]
        except:
            class_of_rating = "N/A"
        try:
            rating_score = container.find("span",{"class":"review-score-badge"}).text.strip()
        except:
            rating_score = "N/A"
        #print(hotel_name + "," + class_of_rating + "," + rating_score)
        f.write(hotel_name.replace(",","|") + "," + class_of_rating + "," + rating_score +"\n")
        #print(class_of_rating)
    page = soup.find("div",{"class":"results-paging"})
    if page.find("span",{"class":"paging-end"}):
        return None
    else:
        next_page = page.find("a",{"class":re.compile("paging-next")})["href"]
    #print(next_page)
    response_next_page = requests.get(next_page)
    response_next_page = response_next_page.text.encode("utf-8")
    return web_scraping(response_next_page)
#web_scraping(response)

In [59]:
filename = "hotel_information.csv"
f = open(filename, "w")
headers = "hotel_name, class_of_rating, rating_score\n"
f.write(headers)
response = requests.get("https://www.booking.com/searchresults.html?label=gen173nr-1DCAEoggJCAlhYSDNYBHIFdXNfbWGIAQGYATG4AQfIAQ3YAQPoAQH4AQKSAgF5qAID&sid=991709bb93b40898e2ebcc9f2e20ade5&class_interval=1&dest_id=20088325&dest_type=city&from_sf=1&group_adults=2&group_children=0&label_click=undef&nflt=di%3D929%3B&no_rooms=1&pop_filter_id=di-929&pop_filter_pos=0&pop_filter_rank=2&raw_dest_type=city&room1=A%2CA&sb_price_type=total&search_selected=1&src=index&src_elem=sb&ss=New%20York%2C%20New%20York%20State%2C%20USA&ss_raw=new%20york&ssb=empty")
response = response.text.encode("utf-8")
web_scraping(response)
f.close()

Now let's scrape some reviews. You can see all review information by going to the hotel page and accessing on “see all reviews” and then view the page sourceFor each review of each each hotel in Manhattan you are to scrape the following attributes: 
1. Hotel name
2. Reviewer name
3. Number of helpful votes (if available, otherwise None)
4. Date
5. Review Rating
6. Negative Review (if available, otherwise None)
7. Positive Review (if available, otherwise None)

Note that you will also need the hotel's name!! Also, some reviews may not have all attributes so then just add None in the corresponding column. 

** Save the data in "review_ratings.csv" in the following format: hotel_name, reviewer_name, num_help_votes, date, review_rating, neg_review, pos_review **

**(4 pts)**

In [7]:
'''
Get all the hotels' urls
'''
hotel_urls = []
def get_urls(response):
    soup = BeautifulSoup(response, 'lxml')
    containers = soup.findAll("div",{"class": re.compile("sr_item_default")})
    for container in containers:
        hotel_url = container.find("a",{"class":re.compile("sr_hotel_preview_track")})["href"].strip()
        hotel_url = "https://www.booking.com" + hotel_url
        print(hotel_url)
        hotel_urls.append(hotel_url)
    page = soup.find("div",{"class":"results-paging"})
    if page.find("span",{"class":"paging-end"}):
        return None
    else:
        next_page = page.find("a",{"class":re.compile("paging-next")})["href"]
    #print(next_page)
    response_next_page = requests.get(next_page)
    response_next_page = response_next_page.text.encode("utf-8")
    return get_urls(response_next_page)
get_urls(response)

https://www.booking.com/hotel/us/tribeca-lofts.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/hotel-lower-east-side-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-standard-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/ultra-luxurious-designer-large-appartment-top-floor-fitness-doorman-terrace-j.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/radio-city-apartments.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/50-bowery.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/freehand-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/archer-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/aka-times-square.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/new-yorker-ramada-plaza.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/times-square-lux-condo.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/gershwin.h

https://www.booking.com/hotel/us/towneplace-suites-by-marriott-new-york-manhattan-times-square.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/beautiful-newly-renovated-3-bedroom-apartment.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/doubletree-by-hilton-new-york-times-square-west.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/langham-place.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/trump-international-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/spectacular-duplex-4-bedroom-3-bath-lincoln-center.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/citizenm-new-york-times-square-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/residence-inn-new-york-manhattan-times-square.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-standard-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-watson.html?from_hc_img=1#hotelTmpl
https://www

https://www.booking.com/hotel/us/sohotel-new-york-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/even-hotels-new-york-midtown-east.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/city-club.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/beekman-tower-new-york1.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/mott-street-apt-1a.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-28th-street-apt-13.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-28th-street-apt-22.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-28th-street-apt-17.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-28th-street-apt-23.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/west-46th-street-apt-4c.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/thompson-street-apt-4d.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/west-89th-street-apt-2f.html?

https://www.booking.com/hotel/us/east-15th-street-apt-2f.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/sullivan-street-west-3rd-apt-4r.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-37th-street-lexington-ave-apt-4f.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-37th-street-lexington-ave-apt-3r.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-37th-street-lexington-ave-apt-2r.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-37th-street-lexington-ave-apt-1r.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/west-12th-street-apt-3b.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/delancey-st-apt-4.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/west-12th-steet-apt-4b.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/elizabeth-street-apt-18.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/elizabeth-street-apt-1.html?from_hc_img=1#hotelTmp

https://www.booking.com/hotel/us/apt-3e.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/dream-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-85th-street-apt-2.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-82nd-street-apt-5c.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-51st-street-apt-4h.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-85th-street-apt-1d.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/gracious-apartment-in-midtown-west.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/302-19.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/east-52nd-street-apt-1b.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/chic-and-beautiful-home-center-of-the-world-nyc.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/373.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/night-hotel-times-square.html?from_hc_img=1#hotelT

https://www.booking.com/hotel/us/holiday-inn-express-new-york-city-times-square.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/innside-by-melia-manhattan.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/alexander.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/leo-house.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/nyc-apartment.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/luxurious-2-bedroom-apartment-lincoln-center.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/spacious-1-bedroom-with-roofdeck-in-an-elevator-building.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-kitano-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/nyc-penthouse.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/329-east-apartment-232480-apts.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/mela.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/fairf

https://www.booking.com/hotel/us/metro.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/two-bedroom-apartment-on-82nd-street.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/pasadena-vacation-rental.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/nobleden.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/cosy-1-bed-apartment-midtown-west-near-hells-kitchen-amp-macy-39-s.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/andaz-wall-street.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/ci-times-square.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/cassa-45-by-bridgestreet.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/chelsea-savoy.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/james-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/beauty-near-times-square.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/gorgeous-4-bedroom-loft-in-m

https://www.booking.com/hotel/us/bogart-street.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/91.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/upper-west-side-two-bedroom.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/apartment-new-york-3.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-paul-an-ascend-collection-member-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-mansfield.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/57.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/ink-48.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/jazz-on-columbus-circle-hostel.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/nyc-townhouse.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-nomad-suites.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/executive-class-at-mts.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/ap

https://www.booking.com/hotel/us/modern-prewar-midtown-2-bedroom-apt.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/mid-town-east-28th-street-apartments.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/mayfair-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/spacious-studio-in-an-elevator-doorman-building.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/the-sherry-netherland.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/beautiful-nyc-brownstone.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/times-square-perfect-home.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/beautiful-midtown-studio.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/14d-upper-west-side-columbus-avenue-and-97th-st.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/gatsby-hotel.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/apartment-midtown-garden.html?from_hc_img=1#hotelT

https://www.booking.com/hotel/us/jun-newyork-mid-town-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/309.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/comfort-inn-lower-east-side-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/renovated-broadway-studio.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/royal-park-nyc.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/3-west-club.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/central-park-apartment.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/amazing-studio-theater-district.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/two-bedroom-chelsea-west-village-apartment.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/la-maison-d-art.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/45th-street-condo-226384-condo.html?from_hc_img=1#hotelTmpl
https://www.booking.com/hotel/us/luxurious-three-

In [8]:
len(hotel_urls)

1005

In [10]:
'''
Get all hotel's review' urls
'''
review_urls = []
def get_review_urls(hotel_urls):
    for url in hotel_urls:
        review_url = 'https://www.booking.com/reviews/us/hotel' + url[url.find('us')+2:]
        print(review_url)
        review_urls.append(review_url)
get_review_urls(hotel_urls)

https://www.booking.com/reviews/us/hotel/tribeca-lofts.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/hotel-lower-east-side-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/the-standard-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/ultra-luxurious-designer-large-appartment-top-floor-fitness-doorman-terrace-j.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/radio-city-apartments.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/50-bowery.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/freehand-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/archer-new-york.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/aka-times-square.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/new-yorker-ramada-plaza.html?from_hc_img=1#hotelTmpl
https://www.booking.com/reviews/us/hotel/times-s

In [30]:
import csv
def get_final_review(response):
    soup = BeautifulSoup(response, 'lxml')
    hotel_name = soup.find("a",{"class":"standalone_header_hotel_link"}).text.strip()
    headers = ["hotel_name", "reviewer_name", "date", "review_rating", "neg_review", "pos_review"]
    filename = "review_ratings.csv"
    try:
        reviews = soup.findAll("li",{"class":"review_item clearfix "})
    except:
        return None
    for review in reviews:
        result = dict()
        result['hotel_name'] = hotel_name
        try:
            reviewer_name = review.find("span",{"itemprop":"name"}).text.strip()
        except:
            reviewer_name = "N/A"
        result['reviewer_name'] = reviewer_name
        #print(reviewer_name)
        try:
            date = review.find("p",{"class":"review_item_date"}).text.strip()[10:].replace(",","")
        except:
            date = "N/A"
        result['date'] = date
        #print(date)
        try:
            review_rating = review.find("span",{"class":"review-score-badge"}).text.strip()
        except:
            review_rating = "N/A"
        result['review_rating'] = review_rating
        #print(review_rating)
        try:
            neg_review = review.find("p", {"class":"review_neg "}).text.strip().replace(",","")[1:]
        except:
            neg_review = "N/A"
        while neg_review.find('\n') != -1:
            i = neg_review.index('\n')
            neg_review = neg_review[0:i] + neg_review[i+1:]
        result['neg_review'] = neg_review
        #print(neg_review)
        try:
            pos_review = review.find("p", {"class":"review_pos "}).text.strip().replace(",","")[1:]
        except:
            pos_review = "N/A"
        while pos_review.find('\n') != -1:
            i = pos_review.index('\n')
            pos_review = pos_review[0:i] + pos_review[i+1:]
        #print(pos_review)
        result['pos_review'] = pos_review
        #print(result)
        with open(filename, 'a+', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, headers)
            writer.writerow(result)
        #print(hotel_name +","+ reviewer_name +","+ date +","+ review_rating +","+ neg_review +","+ pos_review)
    try:
        next_page = soup.find("p",{"class":"page_link review_next_page"}).a["href"]
        next_page = 'https://www.booking.com' + next_page
        response_next_page = requests.get(next_page)
        response_next_page = response_next_page.text.encode("utf-8")
        return get_final_review(response_next_page)
    except:
        return None    

In [31]:
import time
filename = "review_ratings.csv"
headers = ["hotel_name", "reviewer_name", "date", "review_rating", "neg_review", "pos_review"]
with open(filename, 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, headers)
    writer.writeheader()
for review_url in review_urls:
    response = requests.get(review_url)
    response = response.text.encode("utf-8")
    get_final_review(response)
    time.sleep(1)