In [1]:
import selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import json

driver = webdriver.Chrome('webdriver/chromedriver')
WAIT_TIME = 30

def scrape_page():
    """
    acrapes the current page
    """
    tour_info = []  # store the tour info from this page here
    
    # wait until the pagination bar is available
    pagination_bar = WebDriverWait(driver, WAIT_TIME).until(EC.presence_of_element_located((By.CLASS_NAME, "pag")))
    
    # now visit every list item
    for tour_desc in driver.find_elements_by_xpath("//li[contains(@class, 'tour')]"):
        
        tour_name = tour_desc.get_attribute("title")
        
        tour_theme = tour_desc.find_element_by_xpath(".//div[@class='theme']").text.lower().strip()
        
        tour_features = []
        tour_feature_desc = []
        
        d = tour_desc.find_element_by_xpath(".//dl[@class='values']")
            
        for ft in d.find_elements_by_xpath(".//dt"):
            tour_features.append(ft.text)
        for dsc in d.find_elements_by_xpath(".//dd"):
            tour_feature_desc.append(dsc.text)
            
        # price
        price_from = tour_desc.find_element_by_xpath(".//dl[@itemprop='offers']").text.split('$')[-1].strip().replace(',','')
        
        # days
        days = tour_desc.find_element_by_xpath(".//div[@class='days']/div[@class='title']/dl/dt").text
        
        # add to the list
        ti = {'name': tour_name.lower()}
        for t in zip(tour_features, tour_feature_desc):
            ti.update({'_'.join(t[0].lower().strip().replace('/','').split()): 
                       [w.strip() for e in t[1].lower().split(',') for w in e.split('/')]})
        ti.update({'price': price_from, 'days': days, 'theme': tour_theme})
        
        tour_info.append(ti)
        
    return tour_info

if __name__ == '__main__':
    
    intrepid_tours = []
    
    for v in ['intrepid','peregrine','geckos-adventures']:
        
        BASE_URL = f'https://www.tourradar.com/o/{v}'  
        
        print(f'working on {v}...')
        print(f'getting {BASE_URL}..')
        
        driver.get(BASE_URL)
        is_lastpage = False
    
        while not is_lastpage:
    
            intrepid_tours.extend(scrape_page())
    
            # click Next (on the last page Next disappears)
    
            for p_next in driver.find_elements_by_xpath("//div[@class='pag']/a/span"):
                if 'next' in p_next.text.lower():
                    p_next.click()
                    time.sleep(2)
                    is_lastpage = False
                    break
                else:
                    is_lastpage = True
    
    driver.close()
    json.dump(intrepid_tours, open('intrepid-tours.json','w'))
    print(f'done. total {len(intrepid_tours)} tours')

working on intrepid...
getting https://www.tourradar.com/o/intrepid..
working on peregrine...
getting https://www.tourradar.com/o/peregrine..
working on geckos-adventures...
getting https://www.tourradar.com/o/geckos-adventures..
done. total 1325 tours
