# Auto click on all regions 

In [3]:
import time
import json
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize the Chrome driver
def initialize_driver():
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
def open_webpage(driver, url):
    logging.info("Opening the webpage...")
    driver.get(url)
    logging.info("Webpage opened successfully.")

# Select "All Regions" and wait for the listings to load
def select_all_regions(driver):
    try:
        logging.info("Selecting 'All Regions'...")
        select_element = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "#search_region_chosen a.chosen-single"))
        )
        select_element.click()
        all_regions_option = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "#search_region_chosen .chosen-results li[data-option-array-index='0']"))
        )
        all_regions_option.click()
        logging.info("Selected 'All Regions'. Waiting for listings to load...")
        time.sleep(400)  # Wait for 10 seconds to allow listings to load
    except (TimeoutException, WebDriverException) as e:
        logging.error(f"Error selecting 'All Regions': {e}")

# Extract URLs of the listings
def extract_listing_urls(driver):
    try:
        listing_elements = WebDriverWait(driver, 90).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".job_listing .job_listing-clickbox"))
        )
        urls = [element.get_attribute("href") for element in listing_elements]
        logging.info(f"Found {len(urls)} listings.")
        return urls
    except (TimeoutException, WebDriverException, NoSuchElementException) as e:
        logging.error(f"Error extracting listing URLs: {e}")
        return []

# Save URLs to a JSON file
def save_urls(urls, filename='escape_room_urls.json'):
    with open(filename, 'w') as json_file:
        json.dump(urls, json_file, indent=4)
    logging.info(f"Saved {len(urls)} URLs to {filename}")

def main():
    driver = initialize_driver()
    try:
        open_webpage(driver, "https://www.escaperoomdirectory.co.uk/listing-region/serbia/")
        select_all_regions(driver)
        urls = extract_listing_urls(driver)
        save_urls(urls)
    finally:
        driver.quit()

if __name__ == "__main__":
    main()


2024-06-24 08:19:50,761 - INFO - Get LATEST chromedriver version for google-chrome
2024-06-24 08:19:51,267 - INFO - Get LATEST chromedriver version for google-chrome
2024-06-24 08:19:51,774 - INFO - Driver [C:\Users\ibrah\.wdm\drivers\chromedriver\win64\126.0.6478.63\chromedriver-win32/chromedriver.exe] found in cache
2024-06-24 08:19:52,875 - INFO - Opening the webpage...
2024-06-24 08:20:13,771 - INFO - Webpage opened successfully.
2024-06-24 08:20:13,772 - INFO - Selecting 'All Regions'...
2024-06-24 08:20:13,924 - INFO - Selected 'All Regions'. Waiting for listings to load...
2024-06-24 08:28:23,968 - ERROR - Error extracting listing URLs: Message: 

2024-06-24 08:28:23,969 - INFO - Saved 0 URLs to escape_room_urls.json


# Manually click on the regions and control the wait time before scrapping

In [3]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException

# Initialize the Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
print("Opening the webpage...")
driver.get("https://www.escaperoomdirectory.co.uk/listing-region/serbia/")
print("Webpage opened successfully.")

# Wait for 30 seconds to allow the page to load completely
print("Waiting for 120 seconds to allow the page to load completely...")
time.sleep(120)

try:
    # Function to save the list of links to a file
    def save_links(links):
        with open('20.links.txt', 'w') as file:
            for link in links:
                file.write(f"{link}\n")

    # Function to get the list of links from the listings
    def get_links():
        links = []
        listings = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.XPATH, "//ul[@class='job_listings listing-cards-anchor--active']/li"))
        )
        for listing in listings:
            try:
                link = listing.find_element(By.XPATH, ".//a[contains(@class, 'job_listing-clickbox')]").get_attribute("href")
                links.append(link)
            except NoSuchElementException:
                continue
        return links

    # Get the list of links
    links = get_links()

    # Save the links to a file
    save_links(links)

    # Print success message with the count of links
    print(f"Successfully extracted {len(links)} links and saved to '20.links.txt'.")

except TimeoutException as e:
    print(f"Error: {e}")
except WebDriverException as e:
    print(f"WebDriver error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    # Close the browser
    driver.quit()


Opening the webpage...
Webpage opened successfully.
Waiting for 120 seconds to allow the page to load completely...
Successfully extracted 32 links and saved to '20.links.txt'.


# Get the list of (url (for more details), name, address, phone) # WORKS

In [7]:
import time
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException

# Initialize the Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
print("Opening the webpage...")
driver.get("https://www.escaperoomdirectory.co.uk/listing-region/serbia/")
print("Webpage opened successfully.")

# Wait for 120 seconds to allow the page to load completely
print("Waiting for 120 seconds to allow the page to load completely...")
time.sleep(140)

try:
    # Function to get the list of links, names, addresses, and phone numbers from the listings
    def get_links_names_addresses_phones():
        data = []
        listings = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.XPATH, "//ul[@class='job_listings listing-cards-anchor--active']/li"))
        )
        for listing in listings:
            try:
                link = listing.find_element(By.XPATH, ".//a[contains(@class, 'job_listing-clickbox')]").get_attribute("href")
            except NoSuchElementException:
                link = None
            
            try:
                name = listing.find_element(By.XPATH, ".//h3[contains(@class, 'job_listing-title')]").text.strip()
            except NoSuchElementException:
                name = None
            
            try:
                address = listing.find_element(By.XPATH, ".//div[contains(@class, 'job_listing-location')]").text.strip()
            except NoSuchElementException:
                address = None
            
            try:
                phone_number = listing.find_element(By.XPATH, ".//div[contains(@class, 'job_listing-phone')]").text.strip()
            except NoSuchElementException:
                phone_number = None
            
            data.append({"url": link, "name": name, "address": address, "phone": phone_number})
        
        return data

    # Get the list of links, names, addresses, and phone numbers
    data = get_links_names_addresses_phones()
    print(f"Successfully extracted {len(data)} entries.")

    # Save the data to a JSON file
    with open('20.Data.json', 'w') as file:
        json.dump(data, file, indent=4)

    # Print success message
    print(f"Successfully extracted data and saved to '20.Data.json'.")

except TimeoutException as e:
    print(f"Error: {e}")
except WebDriverException as e:
    print(f"WebDriver error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    # Close the browser
    driver.quit()

Opening the webpage...
Webpage opened successfully.
Waiting for 120 seconds to allow the page to load completely...
Successfully extracted 1 entries.
Successfully extracted data and saved to '20.Data.json'.


# Auto Navigation (auto navigation does not show the full data)

In [8]:
import time
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException

# Initialize the Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
print("Opening the webpage...")
driver.get("https://www.escaperoomdirectory.co.uk/listing-region/serbia/")
print("Webpage opened successfully.")

# Wait for 120 seconds to allow the page to load completely
print("Waiting for 120 seconds to allow the page to load completely...")
time.sleep(120)

try:
    all_data = []

    # Function to get the list of links, names, addresses, and phone numbers from the listings
    def get_links_names_addresses_phones():
        data = []
        listings = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.XPATH, "//ul[@class='job_listings listing-cards-anchor--active']/li"))
        )
        for listing in listings:
            try:
                link = listing.find_element(By.XPATH, ".//a[contains(@class, 'job_listing-clickbox')]").get_attribute("href")
            except NoSuchElementException:
                link = None
            
            try:
                name = listing.find_element(By.XPATH, ".//h3[contains(@class, 'job_listing-title')]").text.strip()
            except NoSuchElementException:
                name = None
            
            try:
                address = listing.find_element(By.XPATH, ".//div[contains(@class, 'job_listing-location')]").text.strip()
            except NoSuchElementException:
                address = None
            
            try:
                phone_number = listing.find_element(By.XPATH, ".//div[contains(@class, 'job_listing-phone')]").text.strip()
            except NoSuchElementException:
                phone_number = None
            
            data.append({"url": link, "name": name, "address": address, "phone": phone_number})
        
        return data

    def scrape_all_pages():
        current_page = 1
        while True:
            print(f"Scraping page {current_page}...")
            # Scrape data from the current page
            page_data = get_links_names_addresses_phones()
            all_data.extend(page_data)
            print(f"Successfully extracted {len(page_data)} entries from page {current_page}.")

            # Save the data to a JSON file after scraping each page
            with open('escaperooms_serbia.json', 'w') as file:
                json.dump(all_data, file, indent=4)
            print(f"Data up to page {current_page} saved to 'escaperooms_serbia.json'.")

            try:
                # Find the next page button
                next_page_button = driver.find_element(By.XPATH, f"//a[@data-page='{current_page + 1}']")
                next_page_button.click()
                current_page += 1
                # Wait for the next page to load
                print("Waiting for 100 seconds for the next page to load...")
                time.sleep(100)
            except NoSuchElementException:
                # No more pages to scrape
                print("No more pages to scrape.")
                break

    # Scrape data from all pages
    scrape_all_pages()

except TimeoutException as e:
    print(f"Error: {e}")
except WebDriverException as e:
    print(f"WebDriver error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    # Close the browser
    driver.quit()


Opening the webpage...
Webpage opened successfully.
Waiting for 120 seconds to allow the page to load completely...
Scraping page 1...
Successfully extracted 32 entries from page 1.
Data up to page 1 saved to 'escaperooms_serbia.json'.
Waiting for 100 seconds for the next page to load...
Scraping page 2...
Successfully extracted 32 entries from page 2.
Data up to page 2 saved to 'escaperooms_serbia.json'.
Waiting for 100 seconds for the next page to load...
Scraping page 3...
Successfully extracted 32 entries from page 3.
Data up to page 3 saved to 'escaperooms_serbia.json'.
Waiting for 100 seconds for the next page to load...


KeyboardInterrupt: 