In [24]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

from collections import defaultdict
import re
import json
import os
import time
import arrow
import random

import numpy as np

import subprocess
import zipfile

from attraction import Attraction
from review import Review
from user import User

In [25]:
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument('--incognito')
options.add_argument('--start-maximized')
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs",prefs)

# if headless:
#     options.add_argument('--headless')

In [26]:
driver = webdriver.Chrome('webdriver/chromedriver', options=options)

driver.get('https://www.tripadvisor.com.au/Attraction_Review-g255060-d257278-Reviews-Sydney_Opera_House-Sydney_New_South_Wales.html')

In [70]:
def select_filters(traveller_rating=None, 
                   traveller_type='Solo', 
                   time_of_year=None,  
                   language='English', 
                   max_attempts=3):
    
    d = {'traveller_rating': {'data-name': 'ta_rating',
                              'input-values': {'Excellent': '5',
                                               'Very good': '4',
                                               'Average': '3',
                                               'Poor': '2',
                                               'Terrible': '1'},
                             'pick': traveller_rating},
        'traveller_type': {'data-name': 'traveler_filter',
                           'input-values': {'Families': '3',
                                            'Couples': '2',
                                            'Solo': '5',
                                            'Business': '1',
                                            'Friends': '4'},
                          'pick': traveller_type},
        'time_of_year': {'data-name': 'season',
                         'input-values': {'Mar-May': '1',
                                          'Jun-Aug': '2',
                                          'Sep-Nov': '3',
                                          'Dec-Feb': '4'},
                        'pick': time_of_year},
        'language': {'data-name': 'language',
                     'input-values': {'English': 'en',
                                      'Japanese': 'ja'},
                     'pick': language}}
    
    def is_selected(css_selector_st):
        
        try:
            WebDriverWait(driver, 10) \
                        .until(EC.presence_of_element_located((By.CSS_SELECTOR, 
                                                               css_selector_st + '>input[checked="checked"]')))
            return True
        
        except:
            return False
        
    def _click(css_selector_st, max_attempts=3):
        
        times_tried = 0
        
        flag_before = is_selected(css_selector_st)
        flag_after = flag_before
        
        while (times_tried <= max_attempts) and (flag_after == flag_before):
                
            times_tried += 1   
            print('clicking on ..', css_selector_st)
            
            try:
                e = WebDriverWait(driver, 20) \
                        .until(EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector_st)))
            except:
                print(f'failed to find {css_selector_st}!')
                
            e.click()
            
            flag_after = is_selected(css_selector_st)    
            
        return (flag_after != flag_before)

        
    for filt in d:
        
        value = d[filt]['pick']
        
        # uncheck everything else
        to_uncheck = [other_value for other_value in d[filt]['input-values'] if other_value != value]
        print('need to uncheck ', to_uncheck)
        
        if to_uncheck:
            
            for other_value in to_uncheck:
                print(f'unchecking {other_value}..')
                tr_pick = d[filt]['input-values'][other_value]
                dname = d[filt]['data-name']
                st = f'div.choices[data-name="{dname}"]>div[data-value="{tr_pick}"]'
                
                if is_selected(st):
                    res = _click(st)
                    if not res:
                        print(f'problem unchecking {other_value}') 
                    else:
                        print('unchecked')
        
        if value:
            
            tr_pick = d[filt]['input-values'][value]
            dname = d[filt]['data-name']
            st = f'div.choices[data-name="{dname}"]>div[data-value="{tr_pick}"]'
                
            print(f'selecting {filt}={value}...', end='')
            
            if is_selected(st):
                print('ok')
                continue
            else:
                _selected =  _click(st)
    
            if _selected:
                print('ok')
                
    try:
        lang_code = d['language']['input-values'][d['language']['pick']] 
        css_count = f'div.choices[data-name="language"]>div[data-value="{lang_code}"]>label.label>span.count'
        c_txt = WebDriverWait(driver, 10) \
                        .until(EC.presence_of_element_located((By.CSS_SELECTOR, css_count))).text.strip()
        c = int(re.sub(r'[(,)]','',c_txt))
        print(f'reviews: {c:,}')
    except:
        print('failed to get review count!')

In [71]:
select_filters(traveller_rating='Excellent', traveller_type='Solo', time_of_year='Sep-Nov')

need to uncheck  ['Very good', 'Average', 'Poor', 'Terrible']
unchecking Very good..
unchecking Average..
unchecking Poor..
unchecking Terrible..
selecting traveller_rating=Excellent...ok
need to uncheck  ['Families', 'Couples', 'Business', 'Friends']
unchecking Families..
clicking on .. div.choices[data-name="traveler_filter"]>div[data-value="3"]
clicking on .. div.choices[data-name="traveler_filter"]>div[data-value="3"]


ElementClickInterceptedException: Message: element click intercepted: Element <div class="ui_checkbox item" data-value="3" data-tracker="Family">...</div> is not clickable at point (411, 334). Other element would receive the click: <div class="loadingBox" data-targetevent="update-hotels_loading_box:ar_responsive">...</div>
  (Session info: chrome=75.0.3770.100)
