In [1]:
#Inclusion
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from dotenv import load_dotenv

In [2]:
# Path to your GeckoDriver (Firefox driver)
gecko_driver_path = 'C:/Program Files/geckodriver-v0.35.0-win64/geckodriver.exe'

# Setup Firefox WebDriver
service = Service(gecko_driver_path)
driver = webdriver.Firefox(service=service)

# Open the leaderboard page
driver.get('https://raid.report/leaderboard/worldsfirst/salvationsedge/normal')

# Give time for the page to load, adjust as needed
driver.implicitly_wait(10)

In [3]:
# Scrape player names from the leaderboard
players = driver.find_elements(By.CLASS_NAME, 'MuiTableRow-root')

In [4]:
# Collect timestamp links for each team
clear_links = []

for i in range(1, len(players)):  # Loop through the top 50 rows
    try:
        # Construct XPath for each row's timestamp (adjusting for the row number)
        timestamp_xpath = f"/html/body/div/div/main/div/div[3]/table/tbody/tr[{i}]/td[4]/a"
        
        # Find the link using the constructed XPath
        timestamp_link = driver.find_element(By.XPATH, timestamp_xpath).get_attribute('href')
        
        # Append the full link to the clear_links list (prepend "https://dungeon.report" to href if necessary)
        clear_links.append(timestamp_link)
        
    except Exception as e:
        print(f"Error retrieving link for row {i}: {e}")
        continue

print(clear_links)

['https://raid.report/pgcr/15008773959', 'https://raid.report/pgcr/15013763391', 'https://raid.report/pgcr/15020932267', 'https://raid.report/pgcr/15014319730', 'https://raid.report/pgcr/15014583648', 'https://raid.report/pgcr/15015332930', 'https://raid.report/pgcr/15015874694', 'https://raid.report/pgcr/15306651430', 'https://raid.report/pgcr/15016324295', 'https://raid.report/pgcr/15016461099', 'https://raid.report/pgcr/15016696664', 'https://raid.report/pgcr/15303122022', 'https://raid.report/pgcr/15017675557', 'https://raid.report/pgcr/15017797842', 'https://raid.report/pgcr/15017945192', 'https://raid.report/pgcr/15018357224', 'https://raid.report/pgcr/15018532675', 'https://raid.report/pgcr/15018602254', 'https://raid.report/pgcr/15021828052', 'https://raid.report/pgcr/15019130983', 'https://raid.report/pgcr/15019437682', 'https://raid.report/pgcr/15019475515', 'https://raid.report/pgcr/15019544735', 'https://raid.report/pgcr/15019591725', 'https://raid.report/pgcr/15019723134',

In [5]:
# Initialize a list to store player data with their class and clear status
report_players = []

# Setup WebDriverWait (increase the timeout as needed)
wait = WebDriverWait(driver, 15)  # Wait for up to 15 seconds for elements to load

In [6]:
# Loop through each clear report page
for link in clear_links:
    # Open the clear report page
    driver.get(link)
    time.sleep(2)

    # Wait until the page content loads (adjust the wait condition as needed)
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'card-content')))

    # Wait for player entries to load (based on the class 'pgcr-player-entry')
    players_entries = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'pgcr-player-entry')))

    # Loop through each player entry and extract relevant information
    for entry in players_entries:
        try:
            # Extract player name, class, and clear status with explicit waits
            player_name = entry.find_element(By.CLASS_NAME, 'pgcr-display-name').text.strip()
            
            # Check if the player has the success or failure icon
            icon_element = entry.find_element(By.CLASS_NAME, 'material-icons')
            cleared = icon_element.text == "check_circle"  # True if cleared, False otherwise
            
            # Extract the player class using the correct XPath for the class label
            player_class = entry.find_element(By.XPATH, './/label').text.strip()  # Use relative XPath to find label within entry

            # Append the player information to the list of report players
            report_players.append({
                'Player Name': player_name,
                'Class': player_class,
                'Cleared': cleared,
                'Clear Link': link
            })
        
        except Exception as e:
            print(f"Error processing player entry: {e}")
            continue  # Skip the entry if there is an issue

In [7]:
driver.close()
df = pd.DataFrame(report_players)
print(df)

       Player Name    Class  Cleared                            Clear Link
0           Tyraxe   Hunter     True  https://raid.report/pgcr/15008773959
1    DrakathShadow  Warlock     True  https://raid.report/pgcr/15008773959
2             Jake  Warlock     True  https://raid.report/pgcr/15008773959
3              Ham   Hunter     True  https://raid.report/pgcr/15008773959
4            bravo   Hunter     True  https://raid.report/pgcr/15008773959
..             ...      ...      ...                                   ...
384   BigginLasley  Warlock     True  https://raid.report/pgcr/15008773959
385           Wisp    Titan     True  https://raid.report/pgcr/15008773959
386     SolusAstra   Hunter     True  https://raid.report/pgcr/15008773959
387       Parallax  Warlock     True  https://raid.report/pgcr/15008773959
388     SolusAstra  Warlock    False  https://raid.report/pgcr/15008773959

[389 rows x 4 columns]


In [8]:
df.to_csv('se_player_report.csv', index=False)