<h1> <b> Scripts for Extracting Hotel Data from Source </b> </h1>
<i> Source: <a> https://www.agoda.com/ </a> </i>

In [40]:
# imports here
from bs4 import BeautifulSoup
from pandas import DataFrame
from requests_html import HTMLSession

import pandas as pd

<br>
<h2> <b> Function and Class Definitions </b> </h2>

In [41]:
class Location:
    def __init__(self, main_content):
        self.block = None  # refer to specific location
        self.city = None
        self.province = None
        self.get_location(main_content)
    
    def get_location(self, main_content) -> list:
        '''
        Get and preprocess location string
        '''

        location_str = main_content.select('div div button._23Lf5I ._2wKxGq span')[5].get_text()

        # do not include zip code and country
        location_list = location_str.split(', ')[:-1]
        location_list.pop(-2)

        processed_location_list = self.process_location_list(location_list)
        self.set_location(processed_location_list)
    
    def process_location_list(self, location_list: list) -> list:
        # city and province are located at indexed -1 and -2 respectively
        block_list = self.join_blocks(location_list[:-2])

        try:
            city = location_list[-2]
        except IndexError:
            city = None
        else:
            city = location_list[-2]

        province = self.get_Province(location_list[-1])

        processed_location_list = [block_list, city, province]

        return processed_location_list

    def set_location(self, processed_location_list: list):

        self.block = processed_location_list[0]
        self.city = processed_location_list[1]
        self.province = processed_location_list[2]
    
    def get_Province(self, zip_code_province: str) -> str: 
        '''
        if province only return the province str

        if zip + province, preprocess then return the province str
            use filter on zip_code_province: returns list of all characters
            then join each item extracted.
        '''
        try:
            zip_code_province[0].isalpha()
        except:
            province_str = None
        else:
            if zip_code_province[0].isalpha():
                # return as is
                province_str = zip_code_province 
            
            else:
                characters = list(filter(lambda x: ((x.isalpha())|(x == ' ')), 
                                zip_code_province))

                # do not include first white space
                province_str = ''.join(character for character 
                                        in characters[1:])

        return province_str

    def join_blocks(self, blocks_list: list) -> str:
        # blocks are structured such that
        # ... : ['Timog Ave', 'Diliman']
        # ... : ['2F', 'Uptown Parade']
        # join all elements
        # in between each element we add ', '
        # | after join we have something like 
        # ... '2F, Uptown Parade'

        joined_blocks: str = ''
        current_block_index = 0
        last_block_index = len(blocks_list) - 1

        for block in blocks_list:
            if current_block_index != last_block_index:
                joined_blocks += block + ', '
                current_block_index += 1
            else:
                joined_blocks += block

        return joined_blocks 

class Review:
    def __init__(self, review_card):
        self.reviewee_name = review_card.select_one('div.meta span.reviewer').get_text()
        self.rating  = float(review_card.select_one('div.card-header span.rating span.rating-score').get_text())
        self.trip_type = review_card.select_one('div.meta span.trip-type-nights').get_text()
        self.comment = review_card.select_one('blockquote.expandable-content').get_text()
    
    def add_Review(self, reviews):
        review ={
            'reviewee name':self.reviewee_name,
            'rating':self.rating,
            'trip type':self.trip_type,
            'comment':self.comment
        }

        reviews.append(review)

class Hotel:
    def __init__(self, main_url, reviews_url, session):
        main_content = self.get_MainContent(main_url, session)
        reviews_content = self.get_ReviewsContent(reviews_url, session)


        self.name = self.get_name(main_content)
        self.short_description = self.get_ShortDescription(main_content)
        self.block, self.city, self.province = self.get_location(main_content) 
        self.amenities = self.get_amenities(main_content)
        self.price_range = self.get_PriceRange(main_content)
        self.room_count = self.get_RoomCount(main_content)
        self.landmarks = self.get_Landmarks(main_content)
        self.transportation = self.get_AvailableTransportation(main_content)
        self.reviews = self.get_Reviews(reviews_content)
    
    def get_MainContent(self, main_url, session):
        main_doc = session.get(main_url)
        main_content = BeautifulSoup(main_doc.content, 'html.parser')

        return  main_content
    
    def get_ReviewsContent(self, reviews_url, session):
        review_doc = session.get(reviews_url)
        review_content = BeautifulSoup(review_doc.content, 'html.parser')

        return review_content
    
    def get_name(self, main_content):
        name = main_content.select_one('div._2h6Jhd h1').get_text()
        return name 
    
    def get_ShortDescription(self, main_content):
        short_desc = main_content.select_one('div._1quDY2 p').get_text()
        return short_desc
    
    def get_location(self, main_content):
        hotel_location = Location(main_content)

        return (hotel_location.block, hotel_location.city, 
                hotel_location.province)
    
    def get_amenities(self, main_content):
        amenities = main_content.select('div._3pPyAT ul._3TYstR li')
        amenities = [amenity.get_text() for amenity in amenities]

        return amenities
    
    def get_PriceRange(self, main_content):
        price_range = main_content.select_one('span._2R4dw5').get_text()
        price_range = int(''.join(x for x in price_range if x.isnumeric()))

        return price_range

    def get_RoomCount(self, main_content):
        room_count = main_content.select('#info5 section._1_22qw div._2Ut1Ms ul li')[0].get_text()
        room_count = int(''.join(num for num in room_count
                                if num.isnumeric()))
        
        return room_count
    
    def get_Landmarks(self, main_content):
        whats_around_container = main_content.select('section._1O0SKi')[0]

        landmarks_container = whats_around_container.select_one('._1_22qw div._2Ut1Ms ul')
        landmarks = [location.get_text() for location in landmarks_container.select('li')]

        return landmarks 

    def get_AvailableTransportation(self, main_content):
        whats_around_container = main_content.select('section._1O0SKi')[0]

        transport_classes = [transport_class_card['class'][0] for transport_class_card in 
                                whats_around_container.select('div._2Ut1Ms div ul')]

        terminals= [Hotel.transport_to_text(transport_class, whats_around_container) for transport_class in transport_classes]
        transports_wtClass = {transport_class:terminals_of_class for (transport_class,terminals_of_class)
                                in zip(transport_classes, terminals)}
        
        return transports_wtClass
    
    def get_Reviews(self, reviews_content):
        review_cards = reviews_content.select('div.brand-reviews-listing div.review-card')[:25]
        
        reviews = []

        for review_card in review_cards:
            review = Review(review_card)
            review.add_Review(reviews)
        
        return reviews

    def add_Hotel(self, hotel_data: dict):
        hotel_data['Hotel'].append(self.name)
        hotel_data['Short Description'].append(self.short_description)
        hotel_data['Block'].append(self.block)
        hotel_data['City'].append(self.city)
        hotel_data['Province'].append(self.province)
        hotel_data['Amenities'].append(self.amenities)
        hotel_data['Price Range'].append(self.price_range)
        hotel_data['Room Count'].append(self.room_count)
        hotel_data['Reviews'].append(self.reviews)
        hotel_data['Nearby Landmarks'].append(self.landmarks)
        hotel_data['Nearby Transporation'].append(self.transportation)

    @staticmethod
    def transport_to_text(transport_class, whats_around_container):
        non_formatted: list = whats_around_container.select('div._2Ut1Ms div ul.{0} li'.format(transport_class)) 
        formatted = [non_formatted_transport.get_text() for non_formatted_transport in 
                    non_formatted]
        return formatted


class URL:
    def __init__(self,main_url, reviews_url):
        self.main_url = main_url
        self.reviews_url = reviews_url

<br>
<h2> <b> Main </b> </h2>

In [42]:
# iterate over the ff urls which represent a hotels main page and reviews page
urls = [
    URL( # mariott
        main_url='https://ph.hotels.com/ho960571776/?destination-id=960571776&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0',
        reviews_url='https://ph.hotels.com/ho960571776-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # conrad
        main_url='https://ph.hotels.com/ho384361024/?destination-id=384361024&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho384361024-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # crimson
        main_url = 'https://ph.hotels.com/ho352307/?destination-id=352307&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho352307-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # ascott
        main_url='https://ph.hotels.com/ho255144/?destination-id=255144&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho255144-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Jim's Castle
        main_url='https://ph.hotels.com/ho885236352/?destination-id=885236352&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho885236352-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Angelic Mansion
        main_url='https://ph.hotels.com/ho431229/?destination-id=431229&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho431229-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # edsa shangri la
        main_url='https://ph.hotels.com/ho130228/?destination-id=130228&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho130228-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Batad Highland
        main_url='https://ph.hotels.com/ho825971744/?destination-id=825971744&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho825971744-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # seda
        main_url='https://ph.hotels.com/ho430789/?destination-id=430789&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho430789-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # ngi ngi ngu
        main_url='https://ph.hotels.com/ho337623/?destination-id=337623&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho337623-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # red coconut
        main_url='https://ph.hotels.com/ho421919/?destination-id=421919&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho421919-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # GV hotel
        main_url='https://ph.hotels.com/ho505421/?destination-id=505421&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho505421-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Daluyon Beach and Mountain Resort
        main_url='https://ph.hotels.com/ho487707/?destination-id=487707&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho487707-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Bonifacio Ascott
        main_url='https://ph.hotels.com/ho478447/?destination-id=478447&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho478447-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Crimson hotel
        main_url='https://ph.hotels.com/ho418810/?destination-id=418810&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho418810-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # The forest lodge at camp john hay
        main_url='https://ph.hotels.com/ho697738/?destination-id=697738&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho697738-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # city garden
        main_url='https://ph.hotels.com/ho463016/?destination-id=463016&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho463016-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # dust thani 
        main_url='https://ph.hotels.com/ho1119262784/?destination-id=1119262784&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho1119262784-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # le monet
        main_url='https://ph.hotels.com/ho407442/?destination-id=407442&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho407442-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # butterfly
        main_url='https://ph.hotels.com/ho516628/?destination-id=516628&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho516628-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # city garden hotel
        main_url='https://ph.hotels.com/ho384466/?destination-id=384466&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho384466-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # prime asia
        main_url='https://ph.hotels.com/ho506253/?destination-id=506253&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho506253-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # ahana el nido
        main_url='https://ph.hotels.com/ho1469946240/?destination-id=1469946240&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho1469946240-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Golden Palm
        main_url='https://ph.hotels.com/ho525544/?destination-id=525544&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho525544-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Discovery Primea
        main_url='https://ph.hotels.com/ho474984/?destination-id=474984&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho474984-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Paradores
        main_url='https://ph.hotels.com/ho646749792/?destination-id=646749792&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho646749792-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    ),
    URL( # Pad's Inn
        main_url='https://ph.hotels.com/ho683788416/?destination-id=683788416&q-check-in=2021-05-27&q-check-out=2021-06-01&q-rooms=1&q-room-0-adults=2&q-room-0-children=0&sort-order=BEST_SELLER&WOD=4&WOE=2&MGT=5&ZSX=0&SYE=3&YGF=14',
        reviews_url='https://ph.hotels.com/ho683788416-tr/?q-check-in=2021-05-27&q-check-out=2021-06-01&destination-id=10233139&q-rooms=1&q-room-0-adults=2&SYE=3&ZSX=0&MGT=5&YGF=14&WOD=4&WOE=2&applyEmbargo=false&reviewTab=brand-reviews&f-amid='
    )
]

In [31]:
# define a hotel_data dictionary, to be used later for defining the ...
# ... dataframe

session = HTMLSession()

hotel_data = {
    'Hotel':[],
    'Short Description':[],
    'Block':[],
    'City':[],
    'Province':[],
    'Amenities':[],
    'Price Range':[],
    'Room Count':[],
    'Reviews':[],
    'Nearby Landmarks':[],
    'Nearby Transporation':[]
}

# also define an initial index @ 0
i = 0

<br>
<p> I instantiated hotels manually since the website often returns different htmls per request.
This breaks the program. </p>

<p>retry run if returning none type, 
most likely because html returned was different from referrence
solved by requesting another(rerunning). I ran the program in batches so as to avoid the need to rerun all of the code when the program fails to scrape one of the cards </p>

In [43]:
# now for each item in urls do
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [44]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [45]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [46]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [47]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [48]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [49]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [50]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [51]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [52]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [53]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [54]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [55]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [56]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [57]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [58]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [59]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [60]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [61]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [62]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [63]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [64]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [65]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [66]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [67]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [68]:
hotel = Hotel(urls[i].main_url,urls[i].reviews_url, session)
hotel.add_Hotel(hotel_data)

i+=1 

In [75]:
hotel_df = pd.DataFrame(hotel_data)

In [84]:
file = 'data/Hotel.csv'
hotel_df.to_csv(file, index=False)

In [85]:
# check
hotel_csv_df = pd.read_csv('data/Hotel.csv')
hotel_csv_df.head()

Unnamed: 0,Hotel,Short Description,Block,City,Province,Amenities,Price Range,Room Count,Reviews,Nearby Landmarks,Nearby Transporation
0,Clark Marriott Hotel,"Luxury hotel with 6 restaurants, near Clark Fr...",5398 Manuel A Roxas Highway,Mabalacat City,Pampanga,"['Free WiFi', 'Free parking', 'Pool', 'Non-smo...",8118,260,"[{'reviewee name': 'P', 'rating': 10.0, 'trip ...","['In Clark', 'Clark Freeport Zone - 1 min walk...",{'airport': ['Clark Intl. Airport (CRK) - 13 m...
1,Conrad Manila,"Luxury hotel with spa, near SMX Convention Cen...","Seaside Boulevard, Corner of Coral Way",Pasay,Manila,"['Free parking', 'Pool', 'Airport transfer', '...",6374,347,"[{'reviewee name': 'Nikko', 'rating': 10.0, 't...","['In Barangay 76', 'SM Mall of Asia - 8 min wa...",{'airport': ['Ninoy Aquino Intl. Airport (MNL)...
2,Crimson Resort and Spa Mactan,Beachfront hotel in Lapu-Lapu with spa and out...,"Seascapes Resort Town, Mactan Island",Lapu-Lapu,Cebu,"['Free WiFi', 'Free parking', 'Pool', 'Airport...",5388,290,[],"['On a private beach', 'Magellan Shrine - 34 m...",{'airport': ['Mactan - Cebu Intl. Airport (CEB...
3,Ascott Makati,"Luxury aparthotel with outdoor pool, connected...","Glorietta 4, Ayala Centre",Makati,Manila,"['Free WiFi', 'Free parking', 'Pool', 'Airport...",7700,362,"[{'reviewee name': 'Catherine', 'rating': 8.0,...","['In San Lorenzo', 'Glorietta Mall - 1 min wal...",{'airport': ['Ninoy Aquino Intl. Airport (MNL)...
4,Jim's Castle Inn,Convenient to Palawan State University - Coron,Coron-Busuanga Road,Coron,Palawan,"['Free WiFi', 'Airport transfer', 'Non-smoking']",536,11,"[{'reviewee name': 'KRISTINE', 'rating': 10.0,...","['In Coron Town Proper', 'Palawan State Univer...",{'airport': ['Busuanga (USU-Francisco Reyes) -...


In [86]:
hotel_csv_df.shape

(27, 11)