In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, TimeoutException
from bs4 import BeautifulSoup
import time
import csv

In [2]:
options = Options()
options.headless = True # Run Firefox in headless mode
driver = webdriver.Firefox(options=options)

In [3]:
try:
    # Navigate to the webpage
    driver.get('https://puckpedia.com/players/search')

    # Initialize WebDriverWait
    wait = WebDriverWait(driver, 20)  # Wait up to 20 seconds

    # Wait for the "Get Started" button to become clickable
    get_started_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Get Started")]')))

    # Click the "Get Started" button
    get_started_button.click()

    # Open a CSV file to write data
    with open('nhl_players.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Player', 'Team', 'Position', 'Cap Hit','pv']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        has_next_page = True

        while has_next_page:
            # Wait for player data to load
            wait.until(EC.presence_of_all_elements_located((By.XPATH, '//table/tbody/tr')))

            # Find all the player rows
            rows = driver.find_elements(By.XPATH, '//table/tbody/tr')

            # Loop through each row to extract data
            for row in rows:
                try:
                    # Player's name
                    first_name = row.find_element(By.XPATH, './/span[@x-text="x.p_fn"]').text
                    last_name = row.find_element(By.XPATH, './/span[@x-text="x.p_ln"]').text
                    full_name = f'{first_name} {last_name}'

                    # Team name
                    team_element = row.find_element(By.XPATH, './/td[contains(@class, "flex justify-center")]//a')
                    team_href = team_element.get_attribute('href')
                    team_name = team_href.split('/')[-1].replace('-', ' ').title()

                    # Position
                    position = row.find_element(By.XPATH, './/td[@x-text="x.pos"]').text

                    # Salary Cap Hit
                    cap_hit = row.find_element(By.XPATH, './/td[@x-text="$store.puck.formatContractValue(x.cap_hit)"]').text

                    if cap_hit and cap_hit != 'N/A':
                        cap_hit_value = int(cap_hit.replace('$','').replace(',',''))
                        pv = round(cap_hit_value / 1_000_000,3)
                    else:
                        cap_hit_value = 0
                        pv=0
                            
                    
                    # Write the data to CSV
                    writer.writerow({
                        'Player': full_name,
                        'Team': team_name if team_name != 'Utah Hc' else 'Utah Hockey Club',
                        'Position': position if position == 'D' else 'F',
                        'Cap Hit': cap_hit,
                        'pv':pv
                    })

                except Exception as e:
                    print(f"Error processing row: {e}")

            # Attempt to click the "Next" button
            try:
                # Scroll to the top of the page to ensure the "Next" button is visible
                driver.execute_script("window.scrollTo(0, 0);")

                # Wait for the "Next" button to be present
                next_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//button[.//i[contains(@class, "fa-angle-right")]]')))

                # Get the class attribute of the "Next" button
                next_button_class = next_button.get_attribute('class')

                # Check if the "Next" button is disabled by inspecting its class
                if 'opacity-25' in next_button_class:
                    has_next_page = False  # Exit the loop if the button is disabled
                else:
                    # Click the "Next" button
                    next_button.click()

                    # Wait for the new page to load
                    wait.until(EC.staleness_of(rows[0]))
                    wait.until(EC.presence_of_all_elements_located((By.XPATH, '//table/tbody/tr')))

            except (NoSuchElementException, TimeoutException) as e:
                print(f"Error locating or clicking the 'Next' button: {e}")
                has_next_page = False

finally:
    # Close the WebDriver
    driver.quit()

Error locating or clicking the 'Next' button: Message: 

