In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize the driver
options = webdriver.ChromeOptions()
options.add_argument('--start-maximized')  # Ensure the page loads fully
driver = webdriver.Chrome(options=options)

try:
    # Navigate to the heroes page
    heroes_url = "https://www.dota2.com/heroes"
    driver.get(heroes_url)

    # Initialize a set to store unique hero links
    hero_links = set()

    # Keep scrolling until all hero links are found
    while True:
        # Find all hero link elements
        current_hero_elements = driver.find_elements(By.CSS_SELECTOR, "a[href*='/hero/']")
        current_links = {elem.get_attribute("href") for elem in current_hero_elements}
        
        # Add the current links to the set
        previous_count = len(hero_links)
        hero_links.update(current_links)

        # If no new links are added, assume we've scrolled to the bottom
        if len(hero_links) == previous_count:
            break

        # Scroll down by a large increment
        driver.execute_script("window.scrollBy(0, 1000);")
        time.sleep(1)  # Give time for dynamic loading

    # Convert set to a sorted list for consistent output
    hero_links = sorted(hero_links)

    # Print all hero links
    print(f"Collected {len(hero_links)} hero links:")
    for link in hero_links:
        print(link)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()


Collected 126 hero links:
https://www.dota2.com/hero/abaddon
https://www.dota2.com/hero/alchemist
https://www.dota2.com/hero/ancientapparition
https://www.dota2.com/hero/anti-mage
https://www.dota2.com/hero/arcwarden
https://www.dota2.com/hero/axe
https://www.dota2.com/hero/bane
https://www.dota2.com/hero/batrider
https://www.dota2.com/hero/beastmaster
https://www.dota2.com/hero/bloodseeker
https://www.dota2.com/hero/bountyhunter
https://www.dota2.com/hero/brewmaster
https://www.dota2.com/hero/bristleback
https://www.dota2.com/hero/broodmother
https://www.dota2.com/hero/centaurwarrunner
https://www.dota2.com/hero/chaosknight
https://www.dota2.com/hero/chen
https://www.dota2.com/hero/clinkz
https://www.dota2.com/hero/clockwerk
https://www.dota2.com/hero/crystalmaiden
https://www.dota2.com/hero/darkseer
https://www.dota2.com/hero/darkwillow
https://www.dota2.com/hero/dawnbreaker
https://www.dota2.com/hero/dazzle
https://www.dota2.com/hero/deathprophet
https://www.dota2.com/hero/disruptor

In [32]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

# Initialize the driver (adjust the executable_path to your ChromeDriver location)
driver = webdriver.Chrome()

# Dictionary to store all hero information
heroes_info = {}

def wait_with_retry(driver, timeout, locator):
    retries = 5
    for _ in range(retries):
        try:
            return WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
        except TimeoutException:
            print("Retrying...")
            driver.refresh()  # Refresh the page if timeout occurs
    raise TimeoutException("Element not found after retries")


try:
    # Iterate over each hero link
    for hero_url in hero_links:  # Replace hero_links with your list of URLs
        driver.get(hero_url)
        WebDriverWait(driver, 10).until(
        lambda d: d.execute_script("return document.readyState") == "complete")
        hero_data = {}
        
        # Scroll to the hero's general details
        try:
            #time.sleep(1)
            # Scroll to the bottom to load all elements
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            WebDriverWait(driver, 10).until(
                lambda d: d.execute_script("return document.readyState") == "complete"
            )
            
            # Get hero attribute (Strength/Agility/Intelligence/Universal)
            attribute_element = wait_with_retry(driver, 100, (By.CLASS_NAME, "_3HGWJjSyOjmlUGJTIlMHc_")) 
            driver.execute_script("arguments[0].scrollIntoView(true);", attribute_element)
            WebDriverWait(driver, 10).until(EC.visibility_of(attribute_element))
            hero_data['Attribute'] = attribute_element.text.strip()
            
            # Get hero name
            hero_name = wait_with_retry(driver, 100, (By.CLASS_NAME, "_2IcIujaWiO5h68dVvpO_tQ"))
            hero_name_text = hero_name.text.strip()
            driver.execute_script("arguments[0].scrollIntoView(true);", hero_name)
            WebDriverWait(driver, 10).until(EC.visibility_of(hero_name))
            print(f"hero name: {hero_name_text}")

            
            
            # Get attack type (Melee/Ranged)
            try:
                attack_type_element = wait_with_retry(driver, 100, (By.CLASS_NAME, "_3ce-DKDrVB7q5LsGbJdZ3X"))
                driver.execute_script("arguments[0].scrollIntoView(true);", attack_type_element)
                WebDriverWait(driver, 10).until(EC.visibility_of(attack_type_element))  
                hero_data['Attack Type'] = attack_type_element.text.strip()
            except Exception:
                hero_data['Attack Type'] = "Melee"  # Default to Melee if element not found

            # Get hero hardness
            try:
                hardness_container = wait_with_retry(driver, 100, (By.CLASS_NAME, "_1k-L3kCa-AwQVlB-EScp_y"))
                driver.execute_script("arguments[0].scrollIntoView(true);", hardness_container)
                WebDriverWait(driver, 10).until(EC.visibility_of(hardness_container))  
                hardness_subclasses = hardness_container.find_elements(By.XPATH, "./div")
                base_class = hardness_subclasses[0].get_attribute("class")
                hardness = sum(1 for subclass in hardness_subclasses if subclass.get_attribute("class") == base_class)
                hero_data['Hardness Level'] = hardness
            except Exception:
                hero_data['Hardness Level'] = "N/A"
    
            
            # Scroll to and get role distribution
            try:
                roles_container = wait_with_retry(driver, 100, (By.CLASS_NAME, "_2muZpdcl1nAAKmMRxKMMAI"))
                driver.execute_script("arguments[0].scrollIntoView(true);", roles_container)
                
                # Wait for roles container to become visible
                WebDriverWait(driver, 10).until(
                    EC.visibility_of(roles_container)
                )

                role_elements = driver.find_elements(By.CLASS_NAME, "_3zWGygZT2aKUiOyokg4h1v")
                roles = {}
                for role_element in role_elements:
                    role_name = role_element.find_element(By.CLASS_NAME, "_3Fbk3tlFp8wcznxtXIx19W").text
                    percentage_style = role_element.find_element(By.CLASS_NAME, "f7kjDBQOuPqiwaCTUPzLJ").get_attribute("style")
                    percentage = percentage_style.split("width: ")[1].replace(";", "").strip()
                    roles[role_name] = percentage
                hero_data['Roles'] = roles
            except Exception as e:
                hero_data['Roles'] = "N/A"
                print(f"Failed to get roles for {hero_url}: {e}")


            # Count active skills
            try:
                active_skill_count = 0
                skill_type = wait_with_retry(driver, 100, (By.CLASS_NAME, "_1gAlzFjUFFNhRj02gbrPLW"))
                driver.execute_script("arguments[0].scrollIntoView(true);", skill_type)
                WebDriverWait(driver, 10).until(EC.visibility_of(skill_type))  
                if "Passive" not in skill_type.text:
                    active_skill_count += 1
                skill_parent_elements = driver.find_elements(By.CLASS_NAME, "_1vjw5Sik8Zewkj5_iOhCUb")  # Replace this class
                for skill_parent in skill_parent_elements:
                    sub_elements = skill_parent.find_elements(By.XPATH, "./div")
                    for sub in sub_elements:
                        if sub.get_attribute("class") == "_3Chop4A9yz7Af_BwR1r_NW _3SHwx10zrkJ8dzvQnkI2h8": 
                            driver.execute_script("arguments[0].click();", sub)
                            skill_type = driver.find_element(By.CLASS_NAME, "_1gAlzFjUFFNhRj02gbrPLW")  # Replace for skill type
                            if "Passive" not in skill_type.text:
                                active_skill_count += 1
                hero_data['Active Skills'] = active_skill_count
            except Exception as e:
                print(f"Error2: {e} on {hero_url}")

            
            heroes_info[hero_name_text] = hero_data
            
        #except TimeoutException:
        #    print(f"Timeout while fetching data for {hero_url}")
        except Exception as e:
            print(f"Error1: {e} on {hero_url}")

    # Print all hero data
    for hero, data in heroes_info.items():
        print(f"\nHero: {hero}")
        for key, value in data.items():
            print(f"  {key}: {value}")

finally:
    driver.quit()


hero name: ABADDON
hero name: ALCHEMIST
hero name: ANCIENT APPARITION
hero name: ANTI-MAGE
hero name: ARC WARDEN
hero name: AXE
hero name: BANE
hero name: BATRIDER
hero name: BEASTMASTER
hero name: BLOODSEEKER
hero name: BOUNTY HUNTER
hero name: BREWMASTER
hero name: BRISTLEBACK
hero name: BROODMOTHER
hero name: CENTAUR WARRUNNER
hero name: CHAOS KNIGHT
hero name: CHEN
hero name: CLINKZ
hero name: CLOCKWERK
hero name: CRYSTAL MAIDEN
hero name: DARK SEER
hero name: DARK WILLOW
hero name: DAWNBREAKER
hero name: DAZZLE
hero name: DEATH PROPHET
hero name: DISRUPTOR
hero name: DOOM
hero name: DRAGON KNIGHT
hero name: DROW RANGER
hero name: EARTHSHAKER
hero name: EARTH SPIRIT
hero name: ELDER TITAN
hero name: EMBER SPIRIT
hero name: ENCHANTRESS
hero name: ENIGMA
hero name: FACELESS VOID
hero name: GRIMSTROKE
hero name: GYROCOPTER
hero name: HOODWINK
hero name: HUSKAR
hero name: INVOKER
hero name: IO
hero name: JAKIRO
hero name: JUGGERNAUT
hero name: KEEPER OF THE LIGHT
hero name: KEZ
hero na

In [33]:
len(heroes_info)

126