In [10]:
import time
import json
import random
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException, StaleElementReferenceException, NoSuchElementException

# Initialize the Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
print("Opening the webpage...")
driver.get("https://gobowling.com/Find-A-Center")
print("Webpage opened successfully.")

# Wait for 30 seconds before starting to scrape
print("Waiting for 30 seconds to allow the page to load completely...")
time.sleep(30)

try:
    # Define the maximum number of retries
    max_retries = 3

    # Function to save progress incrementally
    def save_progress():
        with open('bowling_centers.json', 'w') as json_file:
            json.dump(data_list, json_file, indent=4)

    # Function to refresh the list of locations
    def get_locations():
        return WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".LocationListBox .FAC-Result")))

    # Initial fetch of locations
    locations = get_locations()
    total_locations = len(locations)
    print(f"Found {total_locations} locations.")

    # Initialize an empty list to hold the data
    data_list = []

    # Counter to keep track of successful location processing
    success_count = 0

    # Iterate through each location and extract data
    for index in range(total_locations):
        retry_count = 0
        while retry_count < max_retries:
            try:
                # Fetch the latest locations to avoid stale elements
                locations = get_locations()
                location = locations[index]

                # Scroll to the location in small increments to ensure visibility
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", location)
                time.sleep(random.uniform(1, 4))  # Random sleep to mimic human behavior

                # Ensure the location is visible
                WebDriverWait(driver, 10).until(EC.visibility_of(location))

                # Extract data from the location
                try:
                    name = location.find_element(By.CSS_SELECTOR, "h3 a").text
                    phone = location.find_element(By.CSS_SELECTOR, ".fa-fw").find_element(By.XPATH, "..").text.split("")[1].strip()

                    # Try several methods to extract the address
                    address = "N/A"  # Default value if no method works
                    try:
                        # Method 1: Using preceding-sibling
                        address = location.find_element(By.XPATH, ".//span[@class='fa fa-fw']/preceding-sibling::span").text
                    except NoSuchElementException:
                        try:
                            # Method 2: Using parent element
                            address = location.find_element(By.XPATH, ".//span[@class='fa fa-fw']/../../preceding-sibling::span").text
                        except NoSuchElementException:
                            try:
                                # Method 3: Using p element containing the address
                                address = location.find_element(By.XPATH, ".//p/span[1]").text
                            except NoSuchElementException:
                                print("Failed to extract address using all methods.")

                    # Check if 'Miles away' element exists and extract it if present
                    try:
                        miles_away = location.find_element(By.XPATH, "//span[contains(text(), 'Miles away')]").text.split(":")[1].strip()
                    except NoSuchElementException:
                        miles_away = "N/A"

                    # Append the extracted data to the list
                    data_list.append({
                        "name": name,
                        "address": address,
                        "phone": phone,
                        "miles_away": miles_away
                    })

                    # Increment success count
                    success_count += 1

                    print(f"Successfully processed location {index + 1}.")
                    # Break the retry loop if successful
                    break

                except Exception as e:
                    print(f"Error extracting data from location: {e}")
                    raise e  # Raise the exception to trigger a retry

            except (TimeoutException, WebDriverException, StaleElementReferenceException) as e:
                retry_count += 1
                print(f"Error processing location {index + 1} (attempt {retry_count}/{max_retries}): {e}")
                if retry_count == max_retries:
                    print("Max retries reached, skipping this location.")
                    # Log skipped location
                    location_data = {
                        "location_number": index + 1,
                        "status": "skipped",
                        "error_message": str(e)
                    }
                    # Attempt to extract some data even if failed
                    try:
                        location = locations[index]
                        name = location.find_element(By.CSS_SELECTOR, "h3 a").text
                        phone = location.find_element(By.CSS_SELECTOR, ".fa-fw").find_element(By.XPATH, "..").text.split("")[1].strip()
                        try:
                            address = location.find_element(By.XPATH, ".//span[@class='fa fa-fw']/preceding-sibling::span").text
                        except NoSuchElementException:
                            address = "N/A"
                        location_data["name"] = name
                        location_data["address"] = address
                        location_data["phone"] = phone
                        location_data["miles_away"] = "N/A"
                    except Exception as ex:
                        print(f"Error extracting data from location {index + 1}: {ex}")
                        location_data["data_extraction_error"] = str(ex)

                    # Append the location data to the list
                    data_list.append(location_data)

                time.sleep(random.uniform(2, 5))  # Random sleep before retrying

        # Save progress after each location
        save_progress()

    # Close the browser
    driver.quit()

    # Final save to ensure all data is written to the file
    save_progress()

    # Print success count
    print(f"Successfully processed {success_count} locations.")

except TimeoutException as e:
    print(f"Error: {e}")
    driver.quit()
except WebDriverException as e:
    print(f"WebDriver error: {e}")
    driver.quit()
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    driver.quit()


Opening the webpage...
Webpage opened successfully.
Waiting for 30 seconds to allow the page to load completely...
Found 29 locations.
Successfully processed location 1.
Successfully processed location 2.
Successfully processed location 3.
Successfully processed location 4.
Successfully processed location 5.
Successfully processed location 6.
Successfully processed location 7.
Successfully processed location 8.
Successfully processed location 9.
Successfully processed location 10.
Successfully processed location 11.
Successfully processed location 12.
Successfully processed location 13.
Successfully processed location 14.
Successfully processed location 15.
Successfully processed location 16.
Successfully processed location 17.
Successfully processed location 18.
Successfully processed location 19.
Successfully processed location 20.
Successfully processed location 21.
Successfully processed location 22.
Successfully processed location 23.
Successfully processed location 24.
Successful

# Get the HTML data of Div1 as it is, then we can process it using scripts

In [19]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, WebDriverException

# Initialize the Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the webpage
print("Opening the webpage...")
driver.get("https://gobowling.com/Find-A-Center")
print("Webpage opened successfully.")

# Wait for 30 seconds before starting to scrape
print("Waiting for 30 seconds to allow the page to load completely...")
time.sleep(30)

try:
    # Wait until the div with id "Div1" is present
    div1 = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, "Div1")))
    
    # Get the outer HTML of the div
    div1_html = div1.get_attribute('outerHTML')
    
    # Save the HTML to a file
    with open('bowling_centers.html', 'w') as html_file:
        html_file.write(div1_html)
    
    print("Div1 content saved successfully.")

except TimeoutException as e:
    print(f"Error: Timeout while waiting for the element - {e}")
except WebDriverException as e:
    print(f"WebDriver error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    # Close the browser
    driver.quit()


Opening the webpage...
Webpage opened successfully.
Waiting for 30 seconds to allow the page to load completely...
Div1 content saved successfully.
