In [1]:
#Inclusion
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from dotenv import load_dotenv

In [2]:
# Path to your GeckoDriver (Firefox driver)
gecko_driver_path = 'C:/Program Files/geckodriver-v0.35.0-win64/geckodriver.exe'

# Setup Firefox WebDriver
service = Service(gecko_driver_path)
driver = webdriver.Firefox(service=service)

# Open the leaderboard page
driver.get('https://dungeon.report/leaderboard/worldsfirst/sundereddoctrine/challenge')

# Give time for the page to load, adjust as needed
driver.implicitly_wait(10)

In [3]:
# Collect timestamp links for each team
clear_links = []

In [4]:
while True:
    # Scrape player names from the leaderboard
    players = driver.find_elements(By.CLASS_NAME, 'MuiTableRow-root')
    
    for i in range(1, len(players)):  # Loop through all rows
        try:
            # Construct XPath for each row's timestamp (adjusting for the row number)
            timestamp_xpath = f"/html/body/div/div/main/div/div[3]/table/tbody/tr[{i}]/td[4]/a"
            
            # Find the link using the constructed XPath
            timestamp_link = driver.find_element(By.XPATH, timestamp_xpath).get_attribute('href')
            
            # Append the full link to the clear_links list (prepend "https://dungeon.report" to href if necessary)
            clear_links.append(timestamp_link)
            
        except Exception as e:
            print(f"Error retrieving link for row {i}: {e}")
            continue
    
    # Check for a "Next" button to move to the next page
    try:
        next_button = driver.find_element(By.XPATH, '/html/body/div/div/main/div/div[3]/div/div[3]/div/div[2]/button[2]')
        
        # If the button is disabled, break the loop
        if len(clear_links) == 500:
            print("Limit reached, proceeding to exit loop.")
            break

        # If the button is disabled, break the loop
        if not next_button.is_enabled():
            print("No more pages. Pagination complete.")
            break
        
        # Click the "Next" button to load more entries
        next_button.click()
        time.sleep(3)  # Allow time for new page to load

    except Exception as e:
        print("No more pages found or button not clickable. Stopping pagination.")
        break  # Exit loop if no "Next" button is found

print(f"Total links retrieved: {len(clear_links)}")

Limit reached, proceeding to exit loop.
Total links retrieved: 500


In [5]:
# Initialize a list to store player data with their class and clear status
report_players = []

# Setup WebDriverWait (increase the timeout as needed)
wait = WebDriverWait(driver, 15)  # Wait for up to 15 seconds for elements to load

In [6]:
# Loop through each clear report page
for link in clear_links:
    # Open the clear report page
    driver.get(link)
    time.sleep(2)

    # Wait until the page content loads (adjust the wait condition as needed)
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'card-content')))

    # Wait for player entries to load (based on the class 'pgcr-player-entry')
    players_entries = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'pgcr-player-entry')))

    # Loop through each player entry and extract relevant information
    for entry in players_entries:
        try:
            # Extract player name, class, and clear status with explicit waits
            player_name = entry.find_element(By.CLASS_NAME, 'pgcr-display-name').text.strip()
            
            # Check if the player has the success or failure icon
            icon_element = entry.find_element(By.CLASS_NAME, 'material-icons')
            cleared = icon_element.text == "check_circle"  # True if cleared, False otherwise
            
            # Extract the player class using the correct XPath for the class label
            player_class = entry.find_element(By.XPATH, './/label').text.strip()  # Use relative XPath to find label within entry

            # Append the player information to the list of report players
            report_players.append({
                'Player Name': player_name,
                'Class': player_class,
                'Cleared': cleared,
                'Clear Link': link
            })
        
        except Exception as e:
            print(f"Error processing player entry: {e}")
            continue  # Skip the entry if there is an issue

In [7]:
driver.close()
df = pd.DataFrame(report_players)
print(df)

      Player Name    Class  Cleared                               Clear Link
0      luigistyle  Warlock    False  https://dungeon.report/pgcr/15875673805
1     Mariomonkey  Warlock    False  https://dungeon.report/pgcr/15875673805
2           Biggz   Hunter    False  https://dungeon.report/pgcr/15875673805
3           Biggz    Titan    False  https://dungeon.report/pgcr/15875673805
4     Mariomonkey    Titan    False  https://dungeon.report/pgcr/15875673805
...           ...      ...      ...                                      ...
1955     Zoltraak  Warlock    False  https://dungeon.report/pgcr/15876318296
1956          小羽毛  Warlock    False  https://dungeon.report/pgcr/15876318296
1957       cmccwc    Titan     True  https://dungeon.report/pgcr/15876319644
1958        liuhx    Titan     True  https://dungeon.report/pgcr/15876319644
1959    BocekHere  Warlock     True  https://dungeon.report/pgcr/15876319644

[1960 rows x 4 columns]


In [8]:
df.to_csv('sd_player_report.csv', index=False)

: 