# Auto click on all regions 

In [3]:
import time
import json
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize the Chrome driver
def initialize_driver():
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
def open_webpage(driver, url):
    logging.info("Opening the webpage...")
    driver.get(url)
    logging.info("Webpage opened successfully.")

# Select "All Regions" and wait for the listings to load
def select_all_regions(driver):
    try:
        logging.info("Selecting 'All Regions'...")
        select_element = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "#search_region_chosen a.chosen-single"))
        )
        select_element.click()
        all_regions_option = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "#search_region_chosen .chosen-results li[data-option-array-index='0']"))
        )
        all_regions_option.click()
        logging.info("Selected 'All Regions'. Waiting for listings to load...")
        time.sleep(400)  # Wait for 10 seconds to allow listings to load
    except (TimeoutException, WebDriverException) as e:
        logging.error(f"Error selecting 'All Regions': {e}")

# Extract URLs of the listings
def extract_listing_urls(driver):
    try:
        listing_elements = WebDriverWait(driver, 90).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".job_listing .job_listing-clickbox"))
        )
        urls = [element.get_attribute("href") for element in listing_elements]
        logging.info(f"Found {len(urls)} listings.")
        return urls
    except (TimeoutException, WebDriverException, NoSuchElementException) as e:
        logging.error(f"Error extracting listing URLs: {e}")
        return []

# Save URLs to a JSON file
def save_urls(urls, filename='escape_room_urls.json'):
    with open(filename, 'w') as json_file:
        json.dump(urls, json_file, indent=4)
    logging.info(f"Saved {len(urls)} URLs to {filename}")

def main():
    driver = initialize_driver()
    try:
        open_webpage(driver, "https://www.escaperoomdirectory.co.uk/listing-region/serbia/")
        select_all_regions(driver)
        urls = extract_listing_urls(driver)
        save_urls(urls)
    finally:
        driver.quit()

if __name__ == "__main__":
    main()


2024-06-24 08:19:50,761 - INFO - Get LATEST chromedriver version for google-chrome
2024-06-24 08:19:51,267 - INFO - Get LATEST chromedriver version for google-chrome
2024-06-24 08:19:51,774 - INFO - Driver [C:\Users\ibrah\.wdm\drivers\chromedriver\win64\126.0.6478.63\chromedriver-win32/chromedriver.exe] found in cache
2024-06-24 08:19:52,875 - INFO - Opening the webpage...
2024-06-24 08:20:13,771 - INFO - Webpage opened successfully.
2024-06-24 08:20:13,772 - INFO - Selecting 'All Regions'...
2024-06-24 08:20:13,924 - INFO - Selected 'All Regions'. Waiting for listings to load...
2024-06-24 08:28:23,968 - ERROR - Error extracting listing URLs: Message: 

2024-06-24 08:28:23,969 - INFO - Saved 0 URLs to escape_room_urls.json


# Manually click on the regions and control the wait time before scrapping

In [3]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException

# Initialize the Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
print("Opening the webpage...")
driver.get("https://www.escaperoomdirectory.co.uk/listing-region/serbia/")
print("Webpage opened successfully.")

# Wait for 30 seconds to allow the page to load completely
print("Waiting for 30 seconds to allow the page to load completely...")
time.sleep(200)

try:
    # Function to save the list of links to a file
    def save_links(links):
        with open('serbia_links.txt', 'w') as file:
            for link in links:
                file.write(f"{link}\n")

    # Function to get the list of links from the listings
    def get_links():
        links = []
        listings = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.XPATH, "//ul[@class='job_listings listing-cards-anchor--active']/li"))
        )
        for listing in listings:
            try:
                link = listing.find_element(By.XPATH, ".//a[contains(@class, 'job_listing-clickbox')]").get_attribute("href")
                links.append(link)
            except NoSuchElementException:
                continue
        return links

    # Get the list of links
    links = get_links()

    # Save the links to a file
    save_links(links)

    # Print success message with the count of links
    print(f"Successfully extracted {len(links)} links and saved to 'serbia_links.txt'.")

except TimeoutException as e:
    print(f"Error: {e}")
except WebDriverException as e:
    print(f"WebDriver error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    # Close the browser
    driver.quit()


Opening the webpage...
Webpage opened successfully.
Waiting for 30 seconds to allow the page to load completely...
Successfully extracted 32 links and saved to 'serbia_links.txt'.
