In [1]:
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Folder to save CSVs
SAVE_FOLDER = "nrl_stats_2025"

def ensure_folder(folder):
    if not os.path.exists(folder):
        os.makedirs(folder)
        print(f"Created folder: {folder}")

def get_last_column_name(driver):
    try:
        table = driver.find_element(By.CSS_SELECTOR, "table.table")
        headers = [th.text.strip() for th in table.find_elements(By.TAG_NAME, "th")]
        if headers:
            last_col = headers[-1].lower().replace(" ", "_")
            return last_col
    except Exception as e:
        print(f"Could not get last column name: {e}")
    return "stat"

def save_table_csv(driver, base_filename):
    try:
        table = driver.find_element(By.CSS_SELECTOR, "table.table")
        headers = [th.text.strip() for th in table.find_elements(By.TAG_NAME, "th")]

        rows = []
        for tr in table.find_elements(By.TAG_NAME, "tr"):
            cells = tr.find_elements(By.TAG_NAME, "td")
            if cells:
                rows.append([cell.text.strip() for cell in cells])

        if not rows:
            print(f"No data rows found in table for {base_filename}, skipping save.")
            return False

        df = pd.DataFrame(rows, columns=headers)

        # Build full path to save file inside folder
        filename = os.path.join(SAVE_FOLDER, base_filename + ".csv")
        counter = 1
        while os.path.exists(filename):
            filename = os.path.join(SAVE_FOLDER, f"{base_filename}_{counter}.csv")
            counter += 1

        df.to_csv(filename, index=False)
        print(f"Saved table to {filename}")
        return True
    except Exception as e:
        print(f"Failed to save table to {base_filename}.csv: {e}")
        return False

def main():
    ensure_folder(SAVE_FOLDER)

    driver = webdriver.Chrome()
    url = "https://www.nrl.com/stats/?competition=111&season=2025"
    driver.get(url)
    time.sleep(5)

    links = driver.find_elements(By.XPATH, "//a[contains(text(), 'View Full Table')]")

    stat_urls = []
    for link in links:
        href = link.get_attribute("href")
        if href and "stat=" in href:
            team_url = href.replace("/players/", "/teams/")
            stat_urls.append(team_url)

    print(f"✅ Found {len(stat_urls)} team stat table links")

    for stat_url in stat_urls:
        try:
            driver.get(stat_url)
            time.sleep(3)  # wait for page/table to load

            try:
                table = driver.find_element(By.CSS_SELECTOR, "table.table")
                headers = table.find_elements(By.TAG_NAME, "th")
                print(f"Headers found: {len(headers)}")
                if headers:
                    print("Headers:", [th.text for th in headers])

                rows = table.find_elements(By.TAG_NAME, "tr")
                data_rows = [row for row in rows if row.find_elements(By.TAG_NAME, "td")]
                print(f"Data rows found: {len(data_rows)}")

                if len(data_rows) == 0:
                    print(f"No data rows found on page: {stat_url}")
                    print("Table HTML snippet:\n", table.get_attribute('outerHTML')[:500])  # first 500 chars
                    continue  # skip saving for this URL

            except Exception as e:
                print(f"Error finding or reading table on page: {stat_url} | Exception: {e}")
                continue  # skip to next URL

            last_col = get_last_column_name(driver)
            base_filename = f"nrl_2025_{last_col}"

            success = save_table_csv(driver, base_filename)
            if not success:
                print(f"Skipping saving file for URL (no data): {stat_url}")

        except Exception as e:
            print(f"Error processing URL {stat_url}: {e}")

    driver.quit()

if __name__ == "__main__":
    main()

Created folder: nrl_stats_2025
✅ Found 37 team stat table links
Headers found: 5
Headers: ['', '', 'Team', 'Played', 'Points']
Data rows found: 17
Saved table to nrl_stats_2025\nrl_2025_points.csv
Headers found: 5
Headers: ['', '', 'Team', 'Played', 'Tries']
Data rows found: 17
Saved table to nrl_stats_2025\nrl_2025_tries.csv
Headers found: 5
Headers: ['', '', 'Team', 'Played', 'Goals']
Data rows found: 17
Saved table to nrl_stats_2025\nrl_2025_goals.csv
Error finding or reading table on page: https://www.nrl.com/stats/teams/?competition=111&season=2025&stat=1000288 | Exception: Message: no such element: Unable to locate element: {"method":"css selector","selector":"table.table"}
  (Session info: chrome=137.0.7151.105); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x0x7ff7a7fecda5+78885]
	GetHandleVerifier [0x0x7ff7a7fece00+78976]
	(No symbol) [0x0x7ff7a7d

In [None]:
import time
import os
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd

# Step 0: Setup
TEAM_LIST_URL = "https://www.nrl.com/clubs/"
OUTPUT_FOLDER = "team_stats"
SAVE_CSV = os.path.join(OUTPUT_FOLDER, "nrl_2025_team_stats.csv")

def get_team_links(driver):
    driver.get(TEAM_LIST_URL)
    time.sleep(5)
    links = driver.find_elements(By.CSS_SELECTOR, "a[aria-label$='Club Profile']")
    return [link.get_attribute("href") for link in links]

def clean_value(val):
    """Strip % or 's' or whitespace from stat values"""
    return re.sub(r"[^\d\.]", "", val).strip()

def clean_key(key):
    """Convert 'Avg. Points Scored' to 'avg_points_scored'"""
    return re.sub(r"\s+|[^\w]", "_", key.lower()).strip("_")

def parse_team_stats(driver, team_url):
    driver.get(team_url)
    time.sleep(3)

    data = {}

    try:
        team_name = driver.title.split("|")[0].strip()
        data["team"] = team_name
    except:
        data["team"] = "Unknown"

    try:
        # Find all stat blocks
        stat_blocks = driver.find_elements(By.CSS_SELECTOR, "div.card-profile-stat")

        for block in stat_blocks:
            try:
                label = block.find_element(By.CSS_SELECTOR, "dt.card-profile-stat__name").text.strip()
                try:
                    value = block.find_element(By.CSS_SELECTOR, "dd.card-profile-stat__value").text.strip()
                except:
                    value = block.find_element(By.CSS_SELECTOR, "span.donut-chart-stat__value").text.strip()

                # Clean key and value
                data[clean_key(label)] = clean_value(value)
            except Exception as inner_e:
                print(f"⚠️ Skipped a stat block due to error: {inner_e}")
    except Exception as e:
        print(f"❌ Failed to extract stat blocks from: {team_url}\n{e}")

    return data

def main():
    # Create output folder if not exists
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)

    driver = webdriver.Chrome()
    team_urls = get_team_links(driver)
    print(f"Found {len(team_urls)} teams.")

    all_stats = []
    for url in team_urls:
        print("→", url)
        stats = parse_team_stats(driver, url)
        print("   ", stats)
        all_stats.append(stats)

    driver.quit()

    # Save to CSV
    df = pd.DataFrame(all_stats)
    df.to_csv(SAVE_CSV, index=False)
    print(f"✅ Saved data to {SAVE_CSV}")

if __name__ == "__main__":
    main()


Found 17 teams.
→ https://www.nrl.com/clubs/brisbane-broncos/
    {'team': 'Brisbane Broncos Club Profile', 'premierships': '6', 'minor_premierships': '4', 'runner_up': '1', 'total_wins': '537', 'total_losses': '370', 'win_percentage': '56', 'points_scored': '338', 'points_conceded': '294', 'completion_rate': '77', 'tackle_efficiency': '88.7', 'avg__points_scored': '26', 'avg__points_conceded': '22', 'avg__play_the_ball_speed': '3.59'}
→ https://www.nrl.com/clubs/canberra-raiders/
    {'team': 'Canberra Raiders Club Profile', 'premierships': '3', 'minor_premierships': '1', 'runner_up': '3', 'total_wins': '546', 'total_losses': '504', 'win_percentage': '50', 'points_scored': '396', 'points_conceded': '300', 'completion_rate': '80', 'tackle_efficiency': '89.6', 'avg__points_scored': '28', 'avg__points_conceded': '21', 'avg__play_the_ball_speed': '3.47'}
→ https://www.nrl.com/clubs/canterbury-bankstown-bulldogs/
    {'team': 'Canterbury-Bankstown Bulldogs Club Profile', 'premierships': '8