In [3]:
from selenium.webdriver import ActionChains
from selenium.webdriver import Chrome
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

import numpy as np
import os
import sys
import pandas as pd

from time import sleep


In [111]:
class Category:
    def __init__(self, button, name):
        self.button = button
        self.name = name

class Review:

    def __init__(self, review_card):
        self.reviewee = self.get_reviewee_name(review_card) 
        self.rating = self.get_rating(review_card) 
        self.comment = self.get_comment(review_card) 

        self.id()

    def get_reviewee_name(self, review_card):
        reviewee_name = review_card.get_attribute('aria-label')
        return reviewee_name
    
    def get_rating(self, review_card):
        selector = ('div.section-review-line div.section-'
                     + 'review-metadata span.section-review-stars')
        rating = review_card.find_element(By.CSS_SELECTOR, selector).get_attribute('aria-label')

        return rating
    
    def get_comment(self, review_card):
        selector = ('div.section-review-line '
                   + 'div.section-review-review-content ' 
                   + 'div.section-review-text')

        comment = review_card.find_element(By.CSS_SELECTOR, selector) 
        return comment.text
    
    def id(self):
        id = {
            'reviewee name': self.reviewee,
            'rating': self.rating,
            'comment': self.comment
        }

        return id

class Location:
    def __init__(self, driver: Chrome, selector):
        self.block = None  # refer to specific location
        self.city = None
        self.province = None
        self.get_location(driver, selector)
    
    def get_location(self, driver, selector) -> list:
        '''
        Get and preprocess location string
        '''
    
        location_str = driver.find_element(By.CSS_SELECTOR, 
                                           selector).text

        location_list = location_str.split(', ')
        processed_location_list = self.process_location_list(location_list)
        self.set_location(processed_location_list)
    
    def process_location_list(self, location_list: list) -> list:
        # city and province are located at indexed -1 and -2 respectively
        block_list = self.join_blocks(location_list[:-2])

        try:
            city = location_list[-2]
        except IndexError:
            city = None
        else:
            city = location_list[-2]

        province = self.get_Province(location_list[-1])

        processed_location_list = [block_list, city, province]

        return processed_location_list

    def set_location(self, processed_location_list: list):

        self.block = processed_location_list[0]
        self.city = processed_location_list[1]
        self.province = processed_location_list[2]
    
    def get_Province(self, zip_code_province: str) -> str: 
        '''
        if province only return the province str

        if zip + province, preprocess then return the province str
            use filter on zip_code_province: returns list of all characters
            then join each item extracted.
        '''
        try:
            zip_code_province[0].isalpha()
        except:
            province_str = None
        else:
            if zip_code_province[0].isalpha():
                # return as is
                province_str = zip_code_province 
            
            else:
                characters = list(filter(lambda x: ((x.isalpha())|(x == ' ')), 
                                zip_code_province))

                # do not include first white space
                province_str = ''.join(character for character 
                                        in characters[1:])

        return province_str

    def join_blocks(self, blocks_list: list) -> str:
        # blocks are structured such that
        # ... : ['Timog Ave', 'Diliman']
        # ... : ['2F', 'Uptown Parade']
        # join all elements
        # in between each element we add ', '
        # | after join we have something like 
        # ... '2F, Uptown Parade'

        joined_blocks: str = ''
        current_block_index = 0
        last_block_index = len(blocks_list) - 1

        for block in blocks_list:
            if current_block_index != last_block_index:
                joined_blocks += block + ', '
                current_block_index += 1
            else:
                joined_blocks += block

        return joined_blocks 

class TouristSite:
    @staticmethod
    def get_Name(card):
        selector = selector = 'div.rbj0Ud div.skFvHc'
        name = card.find_element(By.CSS_SELECTOR, selector).text

        return name
    
    @staticmethod
    def get_ShortDescription(card):
        selector = 'div.nFoFM'
        name = card.find_element(By.CSS_SELECTOR, selector).text
        
        return name
    
    @staticmethod
    def get_ReviewCount(card):
        selector = ('div.tP34jb span.ta47le '
                    + 'span.oz2bpb span.jdzyld')

        count = card.find_element(By.CSS_SELECTOR, 
                                    selector).text

        # count is orinally '(xxxxxx)' ...
        # ... do not get first and last element 
        count_formatted = int(''.join([i for i in count[1:-1] if i.isnumeric()]) )

        return count_formatted
    
    @staticmethod
    def get_Rating(card):
        selector = ('div.tP34jb span.ta47le'
                    + ' span.oz2bpb span.KFi5wf')
        
        rating = driver.find_element(By.CSS_SELECTOR, selector).text
        rating_float = float(rating)

        return rating_float

def search(site_name, driver):
    search_box = driver.find_element(By.ID, 'searchboxinput')
    search_box.send_keys(site_name)
    
    sleep(3)
    
    suggestion_1 = driver.find_element(By.CSS_SELECTOR, 'div.sbdd_b div.suggestions div.sbsb_b .sbsb_c')
    suggestion_1.click()

In [112]:
def init_base():
       path = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + '\chromedriver\chromedriver.exe'
       driver = Chrome(path)
       wait = WebDriverWait(driver, 10)
       
       return driver, wait
    
def open_pages():
    main_window_url = ('https://www.google.com/travel/things-to-do/see-all?dest_sr'
                       + 'c=ut&dest_mid=%2Fm%2F05v8c&tcfs=EjEKCC9tLzA1djhjEgtQaGls'
                       + 'aXBwaW5lcxoYCgoyMDIxLTA1LTE1EgoyMDIxLTA1LTE5UgJgAQ&ved=0'
                       + 'CAUQyJABahcKEwjAxt_k4KTwAhUAAAAAHQAAAAAQeg&ictx=3&hl=en-'
                       + 'PH&gl=ph&dest_state_type=sattd#ttdm=9.802230_122.457786_'
                       + '7&ttdmf=%252Fm%252F05rdfg')
    search_window_url = ('https://www.google.com/maps/@11.6978351,122.6217542,6z')

    driver.maximize_window()
    driver.get(main_window_url)
    driver.execute_script("window.open('https://www.google.com/maps/@11.6978351,122.6217542,6z')")

    # this is causing stack trace error
    # search_window_searchbar_locator = (By.CSS_SELECTOR, 'div#searchbox')
    # wait.until(EC.visibility_of_element_located(search_window_searchbar_locator))

    driver.switch_to.window(driver.window_handles[0])

    main_window_locator = (By.CSS_SELECTOR,"div.XzK3Bf[role='region']")
    wait.until(EC.visibility_of_element_located(main_window_locator))

def get_categories():
    button_selector = '.SJyhnc .NBZP0e[aria-label="Filters"] .iydyUc'
    category_selector = 'span.VfPpkd-vQzf8d .ouA5Jb .veMtCf'

    categories = [Category(i[0], i[1].text) for i in 
                  zip(driver.find_elements(By.CSS_SELECTOR, button_selector)[1:],
                      driver.find_elements(By.CSS_SELECTOR, category_selector)[1:])
                 ]

    return categories
    
def get_SearchedData(driver, index):
    for site_name in tourist_site_data['name'][index:index+5]:
        search(site_name, driver)
        
        
        
        # get location
        wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.mapsConsumerUiSubviewSectionGm2Listitem__primary-text')))
        site_location = Location(driver, '.mapsConsumerUiSubviewSectionGm2Listitem__primary-text')
        
        # get reviews 
        driver.find_element(By.CSS_SELECTOR, 'button[jsaction = "pane.rating.moreReviews"]').click()
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.section-layout')))
        sleep(3)
        reviews_container = driver.find_elements(By.CSS_SELECTOR, '.section-layout')[4]
        reviews = [Review(review_card).id() for review_card in reviews_container.find_elements(By.CSS_SELECTOR,'.section-review')[:15]]
        
        back_button = driver.find_element(By.CSS_SELECTOR,'button.mdc-icon-button')
        back_button.click()

        # wait until x button is visible
        wait.until(EC.visibility_of_element_located((By.ID,'sb_cb50')))

        # click x button, this will clear the search bar
        x_button=driver.find_element(By.ID,'sb_cb50')
        x_button.click()
    
        # add the data
        tourist_site_data['block'].append(site_location.block)
        tourist_site_data['city'].append(site_location.city)
        tourist_site_data['province'].append(site_location.province)
        tourist_site_data['reviews'].append(reviews)

In [78]:
tourist_site_data = {
    'name':[],
    'category':[],
    'short description':[],
    'rating':[],
    'review count':[],
    'block':[],
    'city':[],
    'province':[],
    'reviews':[]
}

In [79]:
driver, wait = init_base()
open_pages()

categories = get_categories()
for category in categories: 

    button = category.button
    button.click()
    sleep(4)
    tourist_sites = driver.find_elements(By.CSS_SELECTOR, 
                                        'div.GwjAi '
                                        )[:5]
    for tourist_site in tourist_sites:
        name = TouristSite.get_Name(tourist_site)
        short_desc = TouristSite.get_ShortDescription(tourist_site)
        review_count = TouristSite.get_ReviewCount(tourist_site)
        rating = TouristSite.get_Rating(tourist_site)
        
        tourist_site_data['name'].append(name)
        tourist_site_data['category'].append(category.name)
        tourist_site_data['short description'].append(short_desc)
        tourist_site_data['rating'].append(rating)
        tourist_site_data['review count'].append(review_count)
        

    

In [80]:
len(tourist_site_data['name'])

35

In [81]:
# works add search and extract for each
# do searching by batch such that kung fail pwede hindi affected yung progress dun sa iba
'''
ff are the batches to be searched

0:5
5:10
10:15
15:20
20:25
25:30
30:35
'''

'\nff are the batches to be searched\n\n0:5\n5:10\n10:15\n15:20\n20:25\n25:30\n30:35\n'

In [82]:
# switch windows
driver.switch_to_window(driver.window_handles[1])

  driver.switch_to_window(driver.window_handles[1])


In [113]:
i=0
# for batch 1 
get_SearchedData(driver, i)

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"div.section-review-line[jstcache="1160"] div.section-review-metadata span.section-review-stars"}
  (Session info: chrome=90.0.4430.93)


In [None]:
i+=5
do it again